## 0. Setup

In [None]:
# git@github.com:tayyabmujahid/crnn-pytorch.git use this for future test this

In [None]:
!pip install --upgrade wandb

Collecting wandb
  Downloading wandb-0.18.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.7 kB)
Collecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl.metadata (1.8 kB)
Collecting gitpython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.43-py3-none-any.whl.metadata (13 kB)
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-2.14.0-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.9 kB)
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.29,>=1.0.0->wandb)
  Downloading gitdb-4.0.11-py3-none-any.whl.metadata (1.2 kB)
Collecting smmap<6,>=3.0.1 (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb)
  Downloading smmap-5.0.1-py3-none-any.whl.metadata (4.3 kB)
Downloading wandb-0.18.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_

In [None]:
from torch.utils.data import Dataset, DataLoader, Subset
import os
import cv2
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
import torch.nn.init as init
from torch.utils.data import DataLoader

In [None]:
from typing import List
from skimage import io, transform
from PIL import Image

class VocabularyEncoding:
    def __init__(self,root_dir) -> None:
        self.image_paths, self.image_texts = self.load_data(root_dir)
        self.char_list, self.max_label_len = self.create_vocabulary(self.image_texts)


    def load_data(self,root_dir:str):
        #loads images from folder and names of images as labels
        #removes corrupt images
        root_dir = root_dir
        image_paths = list()
        image_texts = list()
        for path in os.listdir(root_dir):
            image_paths.append(root_dir + "/" + path)
            image_texts.append(path.split("_")[1])
        corrupt_images = []
        for path in image_paths:
            try:
                img = cv2.cvtColor(cv2.imread(path), cv2.COLOR_BGR2GRAY)
            except:
                corrupt_images.append(path)
        print("Corrupt Images", corrupt_images)
        print("Number of Corrupt Images",len(corrupt_images))
        print("Deleting Corrupt Images and Paths")
        for path in corrupt_images:
            corrupt_index = image_paths.index(path)
            del image_paths[corrupt_index]
            del image_texts[corrupt_index]
        return image_paths, image_texts
    def create_vocabulary(self,image_texts: List) -> List:
        #get vocabulary for the current dataset
        vocab = set("".join(map(str, image_texts)))
        #max len of label
        max_label_len = max([len(str(text)) for text in image_texts])
        char_list = sorted(vocab)
        return char_list,max_label_len

    def encode_labels(self):
        return list(map(self._encode_labels,self.image_texts))
    def _encode_labels(self,txt):
        # encoding each output word into digits
        dig_lst = []

        for index, char in enumerate(txt):
            try:
                dig_lst.append(self.char_list.index(char))
            except:
                print(char)
        encoded_labels = pad_sequences([dig_lst], maxlen=self.max_label_len, padding='post', value=len(self.char_list))[0]
        return encoded_labels

class MJSynthDataset(Dataset):
    def __init__(self,encoding,transform = None):
        # encoding = VocabularyEncoding(root_dir)
        self.image_paths = encoding.image_paths
        self.image_texts = encoding.image_texts
        self.padded_image_texts = encoding.encode_labels()
        self.transform = transform
    def __len__(self):
        return len(self.image_texts)
    def __getitem__(self,idx):
        image_path = self.image_paths[idx]
        image = Image.open(image_path)
        label = self.padded_image_texts[idx]
        sample = {"image": image, "label": label}
        if self.transform:
            sample['image'] = self.transform(sample['image'])
        return sample


In [1]:
from google.colab import drive
drive.mount('/content/drive')

ModuleNotFoundError: No module named 'google'

In [None]:
!pwd

/content


In [None]:
!mkdir /mjsynth_sample
!unzip -qq /content/drive/My\ Drive/Datasets/mjsynth_sample.zip -d /content/crnn-pytorch/data
# !tar -xzf /content/drive/MyDrive/Datasets/mjsynth.tar.gz -C /content/crnn-pytorch/data
# !cp /content/drive/My\ Drive/Mariyah_Phd/weights/C_LSTM_best.hdf5 /content

mkdir: cannot create directory ‘/mjsynth_sample’: File exists
checkdir:  cannot create extraction directory: /content/crnn-pytorch/data
           No such file or directory


In [None]:
from torchvision.transforms import Grayscale, Compose,Resize, ToTensor
from torch.utils.data import random_split
transforms = Compose([Grayscale(),Resize(size = (32, 128)),ToTensor()])
vv = VocabularyEncoding("mjsynth_sample")
len_char_list = len(vv.char_list)
dataset = MJSynthDataset(encoding = vv, transform = transforms)


train_size = int(0.9 * len(dataset))
valid_size = len(dataset) - train_size

train_dataset, valid_dataset = random_split(dataset, [train_size, valid_size])

Corrupt Images []
Number of Corrupt Images 0
Deleting Corrupt Images and Paths


ValueError: max() arg is an empty sequence

In [None]:
batch_size = 256
train_dataloader = DataLoader(train_dataset,batch_size = batch_size,shuffle=True,num_workers=0)
valid_dataloader = DataLoader(valid_dataset,batch_size = batch_size,shuffle=True,num_workers=0)

In [None]:
class CRNN(nn.Module):
    def __init__(self):
       super(CRNN, self).__init__()
       self._conv = nn.Sequential(
       nn.Conv2d(in_channels=1,out_channels=32,kernel_size = (3,3),padding = 'same'),
       nn.SELU(),
       nn.MaxPool2d(kernel_size=(2,2)),
       nn.Conv2d(in_channels=32,out_channels=64,kernel_size=(3,3),padding='same'),
       nn.SELU(),
       nn.MaxPool2d(kernel_size=(2,2)),
       nn.Conv2d(in_channels=64,out_channels=128,kernel_size=(3,3),padding='same'),
       nn.SELU(),
       nn.Conv2d(in_channels=128,out_channels=128,kernel_size=(3,3),padding='same'),
       nn.SELU(),
       nn.MaxPool2d(kernel_size=(2,1)),
       nn.Conv2d(in_channels=128,out_channels=256,kernel_size=(3,3),padding='same'),
       nn.SELU(),
       nn.BatchNorm2d(256),

       nn.Conv2d(in_channels=256,out_channels=256,kernel_size=(3,3),padding='same'),
       nn.SELU(),
       nn.BatchNorm2d(256),

       nn.MaxPool2d(kernel_size=(2,1)),
       nn.Conv2d(in_channels=256,out_channels=64,kernel_size=(2,2)),
       nn.SELU(),
       )
       self._rnn = nn.Sequential(
       nn.LSTM(input_size = 64, hidden_size = 128, num_layers = 2, batch_first=True, bidirectional=True),


    )
    #    self.bilstm1 = nn.LSTM(bidirectional=True) ?
    #    self.bilstm2 = nn.LSTM(bidirectional=True) ?
       self.linear = nn.Linear(in_features = 256 ,out_features= 62+1)
    def forward(self,x):
        hidden = None
        out = self._conv(x)
        # out = torch.squeeze(out)
        print(out.shape)
        out = out.reshape(256,-1,64)
        print(out.shape)
        out, hidden = self._rnn(out)
        out = self.linear(out)
        return out,hidden
cnn_output_height = 4
gru_hidden_size = 256
gru_num_layers = 2
class CRNN2(nn.Module):
    def __init__(self):
        super(CRNN2, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=(3, 3))
        self.norm1 = nn.InstanceNorm2d(32)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=(3, 3), stride=2)
        self.norm2 = nn.InstanceNorm2d(32)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=(3, 3))
        self.norm3 = nn.InstanceNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=(3, 3), stride=2)
        self.norm4 = nn.InstanceNorm2d(64)
        self.gru_input_size = cnn_output_height * 64
        self.gru = nn.GRU(self.gru_input_size, gru_hidden_size, gru_num_layers,
                          batch_first=True, bidirectional=True)
        self.fc = nn.Linear(gru_hidden_size * 2, len_char_list)
    def forward(self,x):
        batch_size = x.shape[0]
        out = self.conv1(x)
        out = self.norm1(out)
        out = F.leaky_relu(out)
        out = self.conv2(out)
        out = self.norm2(out)
        out = F.leaky_relu(out)
        out = self.conv3(out)
        out = self.norm3(out)
        out = F.leaky_relu(out)
        out = self.conv4(out)
        out = self.norm4(out)
        out = F.leaky_relu(out)
        out = out.reshape(batch_size, -1, self.gru_input_size)
        out, _ = self.gru(out)
        out = torch.stack([F.log_softmax(self.fc(out[i]), dim=-1) for i in range(out.shape[0])])
        return out


In [None]:
#https://medium.com/swlh/multi-digit-sequence-recognition-with-crnn-and-ctc-loss-using-pytorch-framework-269a7aca2a6
#https://saturncloud.io/blog/understanding-pytorch-lstm-input-dimensions-for-data-scientists/
#https://theaisummer.com/simclr/


In [None]:
# crnn2 = CRNN2()

In [None]:
model = CRNN()
criterion = nn.CTCLoss(blank=blank_label, reduction='mean', zero_infinity=True)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [None]:
crnn = CRNN()

In [None]:
sample = next(iter(train_dataloader))


In [None]:
sample['image'].shape

In [None]:

with torch.no_grad():
    images = sample['image']
    output, hidden = crnn(images)
    print(output.shape)

In [None]:
output.shape

In [None]:
output[1][1].shape

In [None]:
ff = torch.rand(2, 3,4)





In [None]:
ff.reshape(-1,4,2).shape

In [None]:
import torch
input = torch.randn(1, 31, 64)
m = torch.nn.LSTM(64,128,bidirectional=True)
output,_ = m(input)
print(output.shape)

In [None]:
x.shape[0]

In [None]:
import torch
x = torch.randn(256, 1, 63)
out = torch.stack([i for i in range(x.shape[0])])
print(out.shape)

In [None]:

x.size()
torch.permute(x, (2, 0, 1)).size()

# CRNN - Pytorch

In [None]:
!git clone https://github.com/tayyabmujahid/crnn-pytorch.git
from google.colab import drive
drive.mount('/content/drive')



Cloning into 'crnn-pytorch'...
remote: Enumerating objects: 221, done.[K
remote: Counting objects: 100% (62/62), done.[K
remote: Compressing objects: 100% (30/30), done.[K
remote: Total 221 (delta 46), reused 32 (delta 32), pack-reused 159 (from 1)[K
Receiving objects: 100% (221/221), 28.13 MiB | 28.05 MiB/s, done.
Resolving deltas: 100% (132/132), done.
Mounted at /content/drive


### Copy MJSynth Dataset

In [6]:
!unzip -qq ~/Downloads/mjsynth_sample.zip -d /home/mujahid/PycharmProjects/crnn-pytorch/data

replace /home/mujahid/PycharmProjects/crnn-pytorch/data/mjsynth_sample/182_slinking_71711.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C


/home/mujahid/PycharmProjects/crnn-pytorch/src


### Copy IAM HW Dataset

In [8]:
!mkdir /home/mujahid/PycharmProjects/crnn-pytorch/data/IAM_HW
!mkdir /home/mujahid/PycharmProjects/crnn-pytorch/data/IAM_HW/words
!mkdir /home/mujahid/PycharmProjects/crnn-pytorch/data/IAM_HW/xml

mkdir: cannot create directory ‘/home/mujahid/PycharmProjects/crnn-pytorch/data/IAM_HW’: File exists
mkdir: cannot create directory ‘/home/mujahid/PycharmProjects/crnn-pytorch/data/IAM_HW/words’: File exists
mkdir: cannot create directory ‘/home/mujahid/PycharmProjects/crnn-pytorch/data/IAM_HW/xml’: File exists


In [10]:
!unzip -qq /content/drive/My\ Drive/Datasets/IAM_HW/rules.zip -d /content/crnn-pytorch/data/IAM_HW/
!tar -xzf /content/drive/My\ Drive/Datasets/IAM_HW/words.tgz -C /content/crnn-pytorch/data/IAM_HW/words
!tar -xzf /content/drive/My\ Drive/Datasets/IAM_HW/xml.tgz -C /content/crnn-pytorch/data/IAM_HW/xml

unzip:  cannot find or open /content/drive/My Drive/Datasets/IAM_HW/rules.zip, /content/drive/My Drive/Datasets/IAM_HW/rules.zip.zip or /content/drive/My Drive/Datasets/IAM_HW/rules.zip.ZIP.
tar (child): /content/drive/My Drive/Datasets/IAM_HW/words.tgz: Cannot open: No such file or directory
tar (child): Error is not recoverable: exiting now
tar: Child returned status 2
tar: Error is not recoverable: exiting now
tar (child): /content/drive/My Drive/Datasets/IAM_HW/xml.tgz: Cannot open: No such file or directory
tar (child): Error is not recoverable: exiting now
tar: Child returned status 2
tar: Error is not recoverable: exiting now


In [13]:
%cd crnn-pytorch/src

[Errno 2] No such file or directory: 'crnn-pytorch/src'
/home/mujahid/PycharmProjects/crnn-pytorch/src


In [None]:
!pwd

/content/crnn-pytorch/src


In [None]:
from evaluate import evaluate

In [None]:
from config import evaluate_config as config
import torch
from dataset import Synth90kDataset, synth90k_collate_fn,Synth90kSample,IAMDataset2,IAMDataset3
from model import CRNN

In [None]:
eval_batch_size = config['eval_batch_size']
cpu_workers = config['cpu_workers']
reload_checkpoint = config['reload_checkpoint']

img_height = config['img_height']
img_width = config['img_width']
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'device: {device}')

device: cuda


In [None]:
num_class = len(Synth90kDataset.LABEL2CHAR) + 1

crnn = CRNN(1, img_height, img_width, num_class,
                map_to_seq_hidden=config['map_to_seq_hidden'],
                rnn_hidden=config['rnn_hidden'],
                leaky_relu=config['leaky_relu'])

In [None]:
print(num_class)

37


In [None]:
crnn.load_state_dict(torch.load('/content/crnn-pytorch/checkpoints/crnn_synth90k.pt',map_location=device))


<All keys matched successfully>

In [None]:
print(crnn)

CRNN(
  (cnn): Sequential(
    (conv0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu0): ReLU(inplace=True)
    (pooling0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu1): ReLU(inplace=True)
    (pooling1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu2): ReLU(inplace=True)
    (conv3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (relu3): ReLU(inplace=True)
    (pooling2): MaxPool2d(kernel_size=(2, 1), stride=(2, 1), padding=0, dilation=1, ceil_mode=False)
    (conv4): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (batchnorm4): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu4): ReLU(inplace=True)
    (conv5): Conv2d(512, 512, 

In [None]:
children_counter = 0
for n,c in crnn.named_children():
    print("Children Counter: ",children_counter," Layer Name: ",n,)
    children_counter+=1

Children Counter:  0  Layer Name:  cnn
Children Counter:  1  Layer Name:  map_to_seq
Children Counter:  2  Layer Name:  rnn1
Children Counter:  3  Layer Name:  rnn2
Children Counter:  4  Layer Name:  dense


In [None]:

# class PooledModel(torch.nn.Module):
#     def __init__(self,crnn):
#         super(PooledModel, self).__init__()
#         self.intermediate_model = torch.nn.Sequential(*(list(crnn.children())[:-1]))
#     def forward(self,x):
#         x = self.intermediate_model(x)
#         x = nn.AvgPool2d(x.size(-1))(x)
#         x = x.squeeze(-1).squeeze(-1)
#         return self.intermediate_model(x)

intermediate_model = torch.nn.Sequential(*(list(crnn.children())[:-1]))

In [None]:
### Dataset loading and Dataloader for MJSynth
# data_path="/content/data/mnt/ramdisk/max/90kDICT32px"
# test_dataset = Synth90kDataset(root_dir=data_path, mode='test',
#                                    img_height=img_height, img_width=img_width)
sample_data_path = '/content/data/mjsynth_sample'
test_dataset = Synth90kSample(root_dir=sample_data_path, mode='validation',split=0.0,
                                   img_height=img_height, img_width=img_width)

test_dataloader = torch.utils.data.DataLoader(
    test_dataset, batch_size=512,
    shuffle=False,collate_fn=synth90k_collate_fn)

In [None]:

iam_hw_dataset = IAMDataset2(ttype='val',img_height=img_height, img_width=img_width)

test_dataloader = torch.utils.data.DataLoader(
    iam_hw_dataset, batch_size=512,
    shuffle=False,collate_fn=synth90k_collate_fn)

In [None]:
# iam_hw_dataset.word_strings

In [None]:
import numpy as np

In [None]:
iam_hw_dataset.CHARS

'0123456789abcdefghijklmnopqrstuvwxyz'

In [None]:
from tqdm import tqdm
import numpy as np
def evaluate_word_spotting(crnn, dataset,dataloader):
    preds_list = list()
    targets_list = list()
    crnn.eval()
    pbar_total = len(dataloader)
    pbar = tqdm(total=pbar_total, desc="Wordspotting Evaluate")

    with torch.no_grad():
        for i, data in enumerate(tqdm(dataloader)):
            device = 'cuda' if next(crnn.parameters()).is_cuda else 'cpu'

            images, targets, target_lengths = [d.to(device) for d in data]
            targets_list.append(targets)

            _, pred_vec = crnn(images)
            pred_vec = torch.squeeze(pred_vec)
            pred_vec = torch.mean(pred_vec, 2)
            pred_vec = pred_vec.cpu().numpy()
            preds_list.append(pred_vec)
            pbar.update(1)
    pbar.close()
    print("starting MAP calculations")
    preds_array = [list(i.T) for i in preds_list]
    lbl_array = dataset.word_strings

    preds_array = [item for sublist in preds_array for item in sublist]
    print(len(preds_array))
    preds_array = np.array(preds_array)
    print(preds_array.shape)
    mAP, avg_precs = map_from_feature_matrix(preds_array, lbl_array, 'euclidean', False)
    print(mAP)
    print(avg_precs)
    return preds_list, targets_list

a, b = evaluate_word_spotting(crnn,iam_hw_dataset, test_dataloader)

Wordspotting Evaluate:   0%|          | 0/12 [00:00<?, ?it/s]
Wordspotting Evaluate:   8%|▊         | 1/12 [00:16<03:03, 16.68s/it]
Wordspotting Evaluate:  17%|█▋        | 2/12 [00:31<02:36, 15.66s/it]
Wordspotting Evaluate:  25%|██▌       | 3/12 [00:46<02:16, 15.11s/it]
Wordspotting Evaluate:  33%|███▎      | 4/12 [01:00<01:58, 14.77s/it]
Wordspotting Evaluate:  42%|████▏     | 5/12 [01:14<01:42, 14.62s/it]
Wordspotting Evaluate:  50%|█████     | 6/12 [01:29<01:27, 14.62s/it]
Wordspotting Evaluate:  58%|█████▊    | 7/12 [01:43<01:12, 14.60s/it]
Wordspotting Evaluate:  67%|██████▋   | 8/12 [01:58<00:58, 14.56s/it]
Wordspotting Evaluate:  75%|███████▌  | 9/12 [02:12<00:43, 14.54s/it]
Wordspotting Evaluate:  83%|████████▎ | 10/12 [02:27<00:29, 14.50s/it]
Wordspotting Evaluate:  92%|█████████▏| 11/12 [02:43<00:14, 14.91s/it]
Wordspotting Evaluate: 100%|██████████| 12/12 [02:43<00:00, 10.46s/it]
100%|██████████| 12/12 [02:43<00:00, 13.61s/it]
Wordspotting Evaluate: 100%|██████████| 12/12 [

starting MAP calculations
5644
(5644, 24)
0.4146227072495958
[0.35261238 1.         0.05841179 ... 0.25165984 0.33631446 0.16482188]


In [None]:
print(len(a))
print(type(a[0]))
preds_array = [list(i.T) for i in a]


preds_array = [item for sublist in preds_array for item in sublist]
print(len(preds_array))
preds_array = np.array(preds_array)
print(preds_array.shape)

12
<class 'numpy.ndarray'>
5644
(5644, 24)


In [None]:
preds_array[0].shape

(24,)

In [None]:
import numpy as np
from datetime import datetime

from scipy.spatial.distance import pdist, squareform


def mean_average_precision(model, x_test, y_test, transcripts):
    """This module evaluates the partially trained model using Test Data
  Args:
    model: Instance of Sequential Class storing Neural Network
    x_test: Numpy storing the Test Images
    y_test: Numpy storing the PHOC Labels of Test Data
    transcripts: String storing the characters in the Image.
  Returns:
    map: Floating number storing the Mean Average Precision.
  """
    start = datetime.now()
    y_pred = model.predict(x_test)
    y_pred = np.where(y_pred < 0.5, 0, 1)
    print("Time taken to predict all data: ", datetime.now() - start)
    start = datetime.now()
    N = len(transcripts)
    precision = {}
    count = {}
    for i in range(N):
        if transcripts[i] not in precision.keys():
            precision[transcripts[i]] = 1
            count[transcripts[i]] = 0
        else:
            precision[transcripts[i]] += 1

    for i in range(N):
        pred = y_pred[i]
        acc = np.sum(abs(y_test - pred), axis=1)
        tmp = np.argmin(acc)
        if transcripts[tmp] == transcripts[i]:
            count[transcripts[tmp]] += 1

    mean_avg_prec = [0, 0]
    for i in range(N):
        if precision[transcripts[i]] <= 1:
            continue
        mean_avg_prec[0] += count[transcripts[i]] * 1.0 / precision[transcripts[i]]
        mean_avg_prec[1] += 1

    print("Time taken to calculate l2 dist: ", datetime.now() - start)
    print("The Mean Average Precision = ", mean_avg_prec[0] * 1. / mean_avg_prec[1])
    print("Total test cases = ", N)


# load data and corresponding transcripts
#


def map_from_feature_matrix(features, labels, metric, drop_first):
    '''
    Computes mAP and APs from a given matrix of feature vectors
    Each sample is used as a query once and all the other samples are
    used for testing. The user can specify whether he wants to include
    the query in the test results as well or not.

    Args:
        features (2d-ndarray): the feature representation from which to compute the mAP
        labels (1d-ndarray or list): the labels corresponding to the features (either numeric or characters)
        metric (string): the metric to be used in calculating the mAP
        drop_first (bool): whether to drop the first retrieval result or not
    '''
    # argument error checks
    if features.shape[0] != len(labels):
        raise ValueError('The number of feature vectors and number of labels must match')
    # compute the pairwise distances from the
    # features
    dists = pdist(X=features, metric=metric)
    dists = squareform(dists)
    inds = np.argsort(dists, axis=1)
    retr_mat = np.tile(labels, (features.shape[0], 1))

    # compute two matrices for selecting rows and columns
    # from the label matrix
    # -> advanced indexing
    row_selector = np.transpose(np.tile(np.arange(features.shape[0]), (features.shape[0], 1)))
    retr_mat = retr_mat[row_selector, inds]

    # create the relevance matrix
    rel_matrix = retr_mat == np.atleast_2d(labels).T
    if drop_first:
        rel_matrix = rel_matrix[:, 1:]

    # calculate mAP and APs
    map_calc = MeanAveragePrecision()
    avg_precs = np.array([map_calc.average_precision(row) for row in rel_matrix])
    mAP = np.mean(avg_precs)
    return mAP, avg_precs

class IterativeMean(object):
    '''
    Class for iteratively computing a mean. With every new value (@see: _add_value)
    the mean will be updated
    '''

    def __init__(self, mean_init=0.0):
        self.__mean = mean_init
        self.__N = 0.0

    def add_value(self, value):
        '''
        Updates the mean with respect to value

        Args:
            value (float): The value that will be incorporated in the mean
        '''
        self.__mean = (self.__N / (self.__N + 1)) * self.__mean + (1.0 / (self.__N + 1)) * value
        self.__N += 1

    def get_mean(self):
        return self.__mean

    def reset(self):
        self.__mean = 0.0
        self.__N = 0.0


class MeanAveragePrecision(IterativeMean):
    '''
    Computes average precision values and iteratively updates their mean
    '''
    def __init__(self):
        super(MeanAveragePrecision, self).__init__()

    def average_precision(self, ret_vec_relevance, gt_relevance_num=None):
        '''
        Computes the average precision and updates the mean average precision

        Args:
            ret_vec_relevance (1d-ndarray): array containing ground truth (gt) relevance values
            gt_relevance_num (int): The number of relevant samples in retrieval. If None the sum
                                    over the retrieval gt list is used.
        '''
        ret_vec_cumsum = np.cumsum(ret_vec_relevance, dtype=float)
        ret_vec_range = np.arange(1, ret_vec_relevance.size + 1)
        ret_vec_precision = ret_vec_cumsum / ret_vec_range

        if gt_relevance_num is None:
            n_relevance = ret_vec_relevance.sum()
        else:
            n_relevance = gt_relevance_num

        if n_relevance > 0:
            ret_vec_ap = (ret_vec_precision * ret_vec_relevance).sum() / n_relevance
        else:
            ret_vec_ap = 0.0

        super(MeanAveragePrecision, self).add_value(ret_vec_ap)

        return ret_vec_ap

In [None]:
# lbl_array = test_dataset.texts
lbl_array = iam_hw_dataset.word_strings

In [None]:
mAP, avg_precs = map_from_feature_matrix(preds_array, lbl_array, 'euclidean', False)
print(mAP, avg_precs)
print('==============================')

NameError: name 'map_from_feature_matrix' is not defined

In [None]:
#   dists = pdist(X=features, metric=metric)
#     dists = squareform(dists)
#     inds = np.argsort(dists, axis=1)
#     retr_mat = np.tile(labels, (features.shape[0], 1))

#     # compute two matrices for selecting rows and columns
#     # from the label matrix
#     # -> advanced indexing
#     row_selector = np.transpose(np.tile(np.arange(features.shape[0]), (features.shape[0], 1)))
#     retr_mat = retr_mat[row_selector, inds]

#     # create the relevance matrix
#     rel_matrix = retr_mat == np.atleast_2d(labels).T
#     if drop_first:
#         rel_matrix = rel_matrix[:, 1:]

#     # calculate mAP and APs
#     map_calc = MeanAveragePrecision()
#     avg_precs = np.array([map_calc.average_precision(row) for row in rel_matrix])
#     mAP = np.mean(avg_precs)

In [None]:
%cd ..
!pwd

/content/crnn-pytorch
/content/crnn-pytorch


In [None]:
!python src/train.py

device: cuda
Reloading Checkpoint
epoch: 1
  self.pid = os.fork()
train_batch_loss[ 500 ]:  1.1580798625946045
Evaluate: 100% 23/23 [00:11<00:00,  2.04it/s]
valid_evaluation: loss=3.0926920287947013, acc=0.5511686947386851
Wordspotting Evaluation:   0% 0/23 [00:00<?, ?it/s]
Wordspotting Evaluation:   4% 1/23 [00:01<00:30,  1.39s/it]
Wordspotting Evaluation:   9% 2/23 [00:01<00:16,  1.25it/s]
Wordspotting Evaluation:  13% 3/23 [00:02<00:12,  1.60it/s]
Wordspotting Evaluation:  17% 4/23 [00:02<00:10,  1.83it/s]
Wordspotting Evaluation:  22% 5/23 [00:02<00:08,  2.09it/s]
Wordspotting Evaluation:  26% 6/23 [00:03<00:07,  2.21it/s]
Wordspotting Evaluation:  30% 7/23 [00:03<00:07,  2.27it/s]
Wordspotting Evaluation:  35% 8/23 [00:04<00:06,  2.29it/s]
Wordspotting Evaluation:  39% 9/23 [00:04<00:06,  2.23it/s]
Wordspotting Evaluation:  43% 10/23 [00:05<00:06,  2.15it/s]
Wordspotting Evaluation:  48% 11/23 [00:05<00:05,  2.22it/s]
Wordspotting Evaluation:  52% 12/23 [00:06<00:04,  2.23it/s]
Wo

In [None]:
import tracemalloc

# code or function for which memory
# has to be monitored


# starting the monitoring
tracemalloc.start()

# function call
map_from_feature_matrix(x,['a','b','a','b','c','a','b','a','b','c'],'euclidean',False)

# displaying the memory
print(tracemalloc.get_traced_memory())

# stopping the library
tracemalloc.stop()

In [None]:
retr_mat = np.tile(['a','b','a','b','c','a','b','a','b','c'], (x.shape[0], 1))
retr_mat

In [None]:
retr_mat.shape

In [None]:
ws = iam_hw_dataset.word_strings
a = list()
for w in ws:
    for i in w:
        a.append(i)
a = set(a)
print(a)

{'H', 'e', 'c', 'B', 'L', 'l', 'D', 'z', '2', 'h', 'S', 'O', '6', 'i', 'K', 'd', 'T', 'g', '9', 'f', 'x', 's', 'k', "'", 'Y', 'b', 'w', 'C', ',', '5', '4', 'A', 'I', '.', 'm', 'q', 'J', '3', 'r', 'P', 'y', 'N', 'j', 'F', 'n', 'v', 'p', 'M', 'R', 'u', 'o', 't', '7', 'W', 'V', 'E', '1', 'G', '0', 'Q', 'U', 'a', '-', '8'}


In [None]:
p = 'absdc'
[*p]

['a', 'b', 's', 'd', 'c']

In [None]:
a = 'a,v'
a.replace(',','')
a = a.replace(',','')

In [None]:
a

'av'

In [None]:
from PIL import Image
import os

paths = os.listdir('/content/data/mjsynth_sample')
paths = [os.path.join('/content/data/mjsynth_sample',i) for i in paths]
print(paths[0])
pil_image = Image.open(paths[10]).convert('RGB')

root = "/content/crnn-pytorch/data/IAM_HW/words/a01/a01-000u"
paths1 = os.listdir(root)
paths1 = [os.path.join(root,i) for i in paths1]
print(paths1[10])
pil_image1 = Image.open(paths1[10]).convert('RGB')


/content/data/mjsynth_sample/211_CRAWDAD_17866.jpg
/content/crnn-pytorch/data/IAM_HW/words/a01/a01-000u/a01-000u-06-03.png


In [None]:
from dataset import Synth90kDataset, synth90k_collate_fn,Synth90kSample,IAMDataset2,IAMDataset3

In [None]:
print(pil_image.size)
print(pil_image1.size)

(140, 31)
(165, 73)


In [None]:
pil_image.size

(121, 31)

In [None]:
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
processor = TrOCRProcessor.from_pretrained('microsoft/trocr-base-printed')
model = VisionEncoderDecoderModel.from_pretrained('microsoft/trocr-base-printed')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/224 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.12k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/4.13k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.33G [00:00<?, ?B/s]

Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-base-printed and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


generation_config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:


# Load the processor and the model


# Load and preprocess the image


# Preprocess the image and extract pixel values
pixel_values = processor(pil_image, return_tensors="pt").pixel_values
# Pass the pixel values to the encoder to extract features
encoder_outputs = model.encoder(pixel_values)

# The encoder outputs are the features extracted from the image
features = encoder_outputs.last_hidden_state
print(features.shape)


torch.Size([1, 577, 768])


In [None]:
pixel_values.shape

torch.Size([1, 3, 384, 384])

In [None]:
from tqdm import tqdm
import numpy as np
def evaluate_word_spotting_tr(dataset,dataloader):
    preds_list = list()
    targets_list = list()

    pbar_total = len(dataloader)
    pbar = tqdm(total=pbar_total, desc="Wordspotting Evaluate")
    model.eval()
    with torch.no_grad():
        for i, data in enumerate(tqdm(dataloader)):


            images, targets, target_lengths = [d.to(device) for d in data]
            targets_list.append(targets)

            encoder_outputs  = model.encoder(images)
            pred_vec = encoder_outputs.last_hidden_state
            pred_vec = torch.squeeze(pred_vec)
            pred_vec = torch.mean(pred_vec, 2)
            pred_vec = pred_vec.cpu().numpy()
            preds_list.append(pred_vec)
            pbar.update(1)
    pbar.close()
    print("starting MAP calculations")
    preds_array = [list(i.T) for i in preds_list]
    lbl_array = dataset.word_strings

    preds_array = [item for sublist in preds_array for item in sublist]
    print(len(preds_array))
    preds_array = np.array(preds_array)
    print(preds_array.shape)
    mAP, avg_precs = map_from_feature_matrix(preds_array, lbl_array, 'euclidean', False)
    print(mAP)
    print(avg_precs)
    return preds_list, targets_list
iam_hw_dataset = IAMDataset3(ttype='val',processor = processor)

test_dataloader = torch.utils.data.DataLoader(
    iam_hw_dataset, batch_size=8,
    shuffle=False)


In [None]:
a, b = evaluate_word_spotting_tr(iam_hw_dataset, test_dataloader)

Wordspotting Evaluate:   0%|          | 0/706 [00:00<?, ?it/s]
  0%|          | 0/706 [00:00<?, ?it/s]


RuntimeError: each element in list of batch should be of equal size

In [None]:
import requests
url = "https://fki.tic.heia-fr.ch/static/img/a01-122-02.jpg"

image = Image.open(requests.get(url, stream=True).raw).convert("RGB")

In [None]:
image.size

(1024, 100)