In [89]:
import pandas as pd
from collections import Counter
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from transformers import TrOCRProcessor, VisionEncoderDecoderModel, AdamW
from transformers.modeling_outputs import SequenceClassifierOutput
from PIL import Image
from tqdm.notebook import tqdm
import evaluate
import ast
import sklearn

In [2]:
TRAIN_CSV = "D:\\Projects\\Priorbank\\Payment-logos\\For testing\\train_data.csv"
IMAGE_DIR = "D:\\Projects\\Priorbank\\Payment-logos\\For testing\\Train\\"
PROCESSOR_PATH = "microsoft/trocr-base-handwritten"
BATCH_SIZE = 10
NUM_CLASSES = 4

In [68]:
df = pd.read_csv(TRAIN_CSV)
df.head()

Unnamed: 0,file_name,text,labels
0,Belkart_0.jpg,Belkart,"[0,0,1,0]"
1,Belkart_1.jpg,Belkart,"[0,0,1,0]"
2,Belkart_1_0.png,Belkart,"[0,0,1,0]"
3,Belkart_1_1.png,Belkart,"[0,0,1,0]"
4,Belkart_1_10.png,Belkart,"[0,0,1,0]"


In [69]:
train_df = df[~df.file_name.str.startswith("Validation")]
train_df.reset_index(drop=True,inplace=True)

val_df = df[df.file_name.str.startswith("Validation")]
val_df.reset_index(drop=True,inplace=True)

print("Unique values for Train: " + str({**Counter(train_df.text)}))
print("Unique values for Validation: " + str({**Counter(val_df.text)}))

Unique values for Train: {'Belkart': 216, 'Mastercard': 213, 'Mir Belkart': 201, 'Mir': 202, 'Visa Mastercard Belkart': 101, 'Visa Mastercard Mir': 226, 'Visa': 213}
Unique values for Validation: {'Visa Mastercard Mir': 101, 'Visa Mastercard': 100, 'Visa Mastercard Belkart': 100}


In [70]:
train_df.loc[:, "labels"] = train_df.labels.apply(ast.literal_eval)
train_df.loc[:, "labels"] = train_df.labels.apply(np.float32)

val_df.loc[:, "labels"] = val_df.labels.apply(ast.literal_eval)
val_df.loc[:, "labels"] = val_df.labels.apply(np.float32)

In [4]:
""" np.random.seed(11)

train_df, val_df = train_test_split(df, test_size=0.2, shuffle=True)
train_df.reset_index(drop=True,inplace=True)
val_df.reset_index(drop=True,inplace=True)

print("Unique values for Train: " + str({**Counter(train_df.text)}))
print("Unique values for Validation: " + str({**Counter(val_df.text)})) """

Unique values for Train: {'Visa Mastercard Mir': 240, 'Visa': 99, 'Mir': 79, 'Mastercard': 83}
Unique values for Validation: {'Visa Mastercard Mir': 61, 'Mir': 22, 'Mastercard': 19, 'Visa': 24}


In [73]:
class Loader(Dataset):
    def __init__(self, data_dir, df, processor, max_length=10):
        super().__init__()
        self.data_dir = data_dir
        self.df = df
        self.max_length = max_length
        self.processor = processor

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):
        file_name = self.df["file_name"][index]
        text = self.df["text"][index]
        labels = self.df["labels"][index]
        image = Image.open(self.data_dir + file_name).convert("RGB")
        pixel_values = self.processor(image, return_tensors="pt").pixel_values
        text = self.processor.tokenizer(text,
                                          padding="max_length",
                                          max_length=self.max_length).input_ids
        text = [token if token != self.processor.tokenizer.pad_token_id else -100 for token in text]
        
        return {"pixel_values": pixel_values.squeeze(), 
                "text": torch.tensor(text),
                "labels": torch.tensor(labels)}

In [74]:
processor = TrOCRProcessor.from_pretrained(PROCESSOR_PATH)

train_dataset = Loader(data_dir=IMAGE_DIR, df=train_df,
                       processor=processor)
val_dataset = Loader(data_dir=IMAGE_DIR, df=val_df,
                      processor=processor)



In [75]:
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
eval_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

In [76]:
TrOCR_model = VisionEncoderDecoderModel.from_pretrained(PROCESSOR_PATH)

TrOCR_model.config.encoder.encoder_stride = 16
TrOCR_model.config.encoder.patch_size = 16
TrOCR_model.config.decoder_start_token_id = processor.tokenizer.cls_token_id
TrOCR_model.config.pad_token_id = processor.tokenizer.pad_token_id
TrOCR_model.config.vocab_size = TrOCR_model.config.decoder.vocab_size

TrOCR_model.config.eos_token_id = processor.tokenizer.sep_token_id
TrOCR_model.config.max_length = 64
TrOCR_model.config.early_stopping = True
TrOCR_model.config.no_repeat_ngram_size = 3
TrOCR_model.config.length_penalty = 2.0
TrOCR_model.config.num_beams = 4

Some weights of VisionEncoderDecoderModel were not initialized from the model checkpoint at microsoft/trocr-base-handwritten and are newly initialized: ['encoder.pooler.dense.bias', 'encoder.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [37]:
""" state = torch.load("D:\\Projects\\Priorbank\\Payment-logos\\For testing\\model_state.pt")
model.load_state_dict(state["state_dict"]) """

  state = torch.load("D:\\Projects\\Priorbank\\Payment-logos\\For testing\\model_state.pt")


<All keys matched successfully>

In [77]:
class CustomModel(nn.Module):
    def __init__(self, model, num_labels):
        super(CustomModel,self).__init__()
        self.num_labels = num_labels
        self.model = model
        self.classifier = nn.Linear(model.config.decoder.hidden_size,num_labels)
        
    def forward(self, pixel_values, text=None, labels=None):
        outputs = self.model(pixel_values=pixel_values,
                             labels = text,
                             output_hidden_states=True)
        global logits
        hidden_states = outputs.decoder_hidden_states[-1]
        logits = self.classifier(hidden_states[:, 0, :].view(-1,self.model.config.decoder.hidden_size))
        
        loss=None
        if labels is not None:
            loss_fct = nn.BCEWithLogitsLoss()
            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1, self.num_labels))
        
        return SequenceClassifierOutput(loss=loss, logits=logits)

In [78]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model=CustomModel(TrOCR_model,NUM_CLASSES).to(device)

In [83]:
f1 = evaluate.load("f1", trust_remote_code=True)

In [88]:
optimizer = AdamW(model.parameters(), lr=1e-4)
#optimizer.load_state_dict(state["optimizer"])
preds = []

for epoch in range(2):
   model.train()
   for batch in tqdm(train_dataloader):
      for k,v in batch.items():
        batch[k] = v.to(device)

      outputs = model(**batch)
      loss = outputs.loss
      loss.backward()
      optimizer.step()
      optimizer.zero_grad()

   model.eval()
   with torch.no_grad():
     for batch in tqdm(eval_dataloader):
       outputs = model(batch["pixel_values"].to(device))

       logits = outputs.logits
       activation = torch.sigmoid(logits)
       predictions = (activation >= 0.5).float()
       preds.append(predictions)
       for indx in range(activation.shape[0]):
         f1.add_batch(predictions=predictions[indx],
                      references=batch["labels"][indx])
         
print(f1.compute(average='binary'))



  0%|          | 0/138 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [19]:
print(sklearn.metrics.classification_report(
    val_df["labels"],
    preds.cpu().numpy(),
    labels=NUM_CLASSES
))

  0%|          | 0/31 [00:00<?, ?it/s]

Prediction: ['Belkart', 'Belkart', 'Belkart', 'Belkart', 'Belkart', 'Belkart', 'Belkart', 'Belkart', 'Belkart', 'Belkart']
True: ['Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir']
0.8421052631578947
Prediction: ['Belkart', 'Belkart', 'Belkart', 'Belkart', 'Belkart', 'Belkart', 'Belkart', 'Belkart', 'Belkart', 'Belkart']
True: ['Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir']
0.8421052631578947
Prediction: ['Belkart', 'Belkart', 'Belkart', 'Belkart', 'Belkart', 'Belkart', 'Belkart', 'Belkart', 'Belkart', 'Belkart']
True: ['Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir', 'Visa Mastercard Mir

In [48]:
""" state = {
    'state_dict': model.state_dict(),
    'optimizer': optimizer.state_dict(),
}
torch.save(state, "D:\\Projects\\Priorbank\\Payment-logos\\For testing\\model_state.pt") """