In [1]:
import os
os.chdir("../")

In [2]:
import torch
from torch.utils import data
from network import dataset
from src.Glean.utils.evaluate import evaluate
from sklearn.metrics import recall_score, precision_score, f1_score
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm
from src.Glean import logger
from src.Glean.entity.config_entity import TrainModelConfig
from pathlib import Path
from src.Glean.config.configuration import ConfigurationManager
import os
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

from torch import nn
import torch.nn.functional as F
from transformers import LayoutLMv3Model

from network.neighbour_attention import MultiHeadAttention
from network.neighbour_embedding import NeighbourEmbedding

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
config = ConfigurationManager()
train_config = config.train_model_config()

[2023-11-21 00:42:24,064: INFO: common: yaml file: config\config.yaml loaded successfully]
[2023-11-21 00:42:24,064: INFO: common: yaml file: params.yaml loaded successfully]
[2023-11-21 00:42:24,073: INFO: common: created directory at: artifacts]
[2023-11-21 00:42:24,073: INFO: common: created directory at: artifacts/best_model]


In [4]:
train_data = dataset.DocumentsDataset(train_config, 'train')
val_data = dataset.DocumentsDataset(train_config, 'val')

VOCAB_SIZE = len(train_data.vocab)
VOCAB_SIZE

[2023-11-21 00:42:24,083: INFO: dataset: Preprocessed data available, Loading data from cache...]

Class Mapping: {'registration_num': 0}
Classs counts: {'registration_num': 197}
[2023-11-21 00:42:24,101: INFO: dataset: Preprocessed data available, Loading data from cache...]

Class Mapping: {'registration_num': 0}
Classs counts: {'registration_num': 99}


50268

In [5]:
def train(model, train_dataloader, val_dataloader, epochs):

        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        model = model.to(device)
        # writer = SummaryWriter(comment=f"LR_{self.config.lr}_BATCH_{self.config.batch_size}")
        # criterion = nn.BCELoss()
        criterion = torch.nn.BCELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=train_config.lr)

        train_loss_history = []
        train_accuracy_history = []
        recall_history = []
        precision_history = []
        f1_history = []
        val_loss_history = []
        val_accuracy_history = []
        val_recall_history = []
        val_precision_history = []
        val_f1_history = []
        val_max_score = 0.0

        for epoch in range(1, epochs + 1):

            train_loss = 0.0
            train_accuracy = 0.0
            y_preds = []
            y_labels = []

            for field, candidate, words, positions, masks, labels in tqdm(train_dataloader, desc="Epoch %s" % epoch):
                # print(field.dim())
                field = field.to(device)
                candidate = candidate.to(device)
                words = words.to(device)
                positions = positions.to(device)
                masks = masks.to(device)
                labels = labels.to(device)

                outputs = model(field, candidate, words, positions, masks)
                print("Outputs: ",outputs)
                loss = criterion(outputs, labels)
                print("Loss: ", loss)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                preds = outputs.round()
                y_preds.extend(list(preds.cpu().detach().numpy().reshape(1, -1)[0]))
                y_labels.extend(list(labels.cpu().detach().numpy().reshape(1, -1)[0]))

                train_accuracy += torch.sum(preds == labels).item()
                train_loss += loss.item()

            # else:
            #     val_accuracy, val_loss, val_recall, val_precision, val_f1 = evaluate(model, val_dataloader, criterion)

            #     train_loss = train_loss / train_dataloader.sampler.num_samples
            #     train_accuracy = train_accuracy / train_dataloader.sampler.num_samples
            #     recall = recall_score(y_labels, y_preds)
            #     precision = precision_score(y_labels, y_preds)
            #     f1score = f1_score(y_labels, y_preds)

            #     train_loss_history.append(train_loss)
            #     train_accuracy_history.append(train_accuracy)
            #     recall_history.append(recall)
            #     precision_history.append(precision)
            #     f1_history.append(f1score)
            #     val_loss_history.append(val_loss)
            #     val_accuracy_history.append(val_accuracy)
            #     val_recall_history.append(val_recall)
            #     val_precision_history.append(val_precision)
            #     val_f1_history.append(val_f1)

            #     if val_recall > val_max_score: # Saving the best model
            #         print('saving model....')
            #         val_max_score = val_recall
            #         os.makedirs(Path(train_config.best_model_dir), exist_ok=True)
            #         torch.save(model, Path(train_config.best_model_dir)/'model.pth')
            #     print(f"Metrics for Epoch {epoch}:  Loss:{round(train_loss, 4)} \
            #             Recall: {round(recall, 4)} \
            #             Validation Loss: {round(val_loss, 4)} \
            #             Validation Recall: {round(val_recall, 4)}")
            break

        return {
            # 'training_loss': train_loss_history,
            # 'training_accuracy': train_accuracy_history,
            'training_recall': recall_history,
            'training_precision': precision_history,
            'training_f1': f1_history,
            # 'validation_loss': val_loss_history,
            # 'validation_accuracy': val_accuracy_history,
            'validation_recall': val_recall_history,
            'validation_precision': val_precision_history,
            'validation_f1': val_f1_history
        }

In [6]:
class Model(nn.Module):

    def __init__(self, vocab_size, embedding_dim, neighbours, heads):
        super().__init__()

        self.lm_model = LayoutLMv3Model.from_pretrained('nielsr/layoutlmv3-finetuned-funsd')

        self.cand_embed = nn.Linear(2, 128)
        self.field_embed = nn.Linear(768, embedding_dim)
        self.embedding_dimension = embedding_dim
        self.neighbour_embeddings = NeighbourEmbedding(vocab_size, embedding_dim)

        self.attention_encodings = MultiHeadAttention(heads, embedding_dim * 2)
        self.linear_projection = nn.Linear(neighbours * embedding_dim * 2, 4 * embedding_dim * 2)
        self.linear_projection_2 = nn.Linear(128 + (2 * embedding_dim), embedding_dim)
        self.cos_sim = nn.CosineSimilarity(dim=1, eps=1e-6)

    def forward(self, field_id, candidate, neighbour_words, neighbour_positions, masks):
        # Field and candidate embeddings
        # print("Field_id.shape: ", field_id.shape)
        id_lm_embed = self.lm_model.embeddings.word_embeddings(field_id.view(-1).to(torch.long))
        # print("ID lm embed shape: ", id_lm_embed.shape)
        id_embed = self.field_embed(id_lm_embed)
        # print("ID Embed shape: ", id_embed.shape)
        cand_embed = self.cand_embed(candidate)

        # Neighbour embeddings
        neighbour_embeds = self.neighbour_embeddings(neighbour_words, neighbour_positions)
        # print(neighbour_embeds)

        # Attention encodings
        self_attention = self.attention_encodings(neighbour_embeds, neighbour_embeds, neighbour_embeds, mask=masks)

        # Linear projection of attention to concatenate with candidate embedding
        bs = self_attention.size(0)
        self_attention = self_attention.view(bs, -1)
        linear_proj = F.relu(self.linear_projection(self_attention))

        linear_proj = linear_proj.view(bs, 4, -1)

        pooled_attention = F.max_pool2d(linear_proj, 2, 2)

        unrolled_attention = pooled_attention.view(bs, -1)

        # Concatenating Candidate embedding and Attention
        concat = torch.cat((cand_embed, unrolled_attention), dim=1)

        # Re-projecting concatenated embedding to calculate cosing similarity
        projected_candidate_encoding = F.relu(self.linear_projection_2(concat))
        # print("Projected candidate encoding shape: ",projected_candidate_encoding.shape)
        # print("ID embed shape: ", id_embed.shape)
        # Calculating cosine similarity and scaling to [0,1]
        similarity = self.cos_sim(id_embed, projected_candidate_encoding).view(bs, -1)
        scores = (similarity + 1) / 2

        return scores

In [7]:
train_loader = data.DataLoader(train_data, batch_size=train_config.batch_size, shuffle=True)
val_loader = data.DataLoader(val_data, batch_size=train_config.batch_size, shuffle=True)

model = Model(VOCAB_SIZE, train_config.embedding_size, train_config.neighbours, train_config.heads)
train_model = train(model, train_loader, val_loader, train_config.epochs)
# train_fn = train_model.train()
# train_fn

Epoch 1:   0%|          | 0/1376 [00:00<?, ?it/s]

torch.Size([2, 5])
Outputs:  

Epoch 1:   0%|          | 0/1376 [00:02<?, ?it/s]


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
from transformers import LayoutLMv3Model

# Load the pretrained LayoutLMv3 model
model = LayoutLMv3Model.from_pretrained('nielsr/layoutlmv3-finetuned-funsd')
model

LayoutLMv3Model(
  (embeddings): LayoutLMv3TextEmbeddings(
    (word_embeddings): Embedding(50265, 768, padding_idx=1)
    (token_type_embeddings): Embedding(1, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (position_embeddings): Embedding(514, 768, padding_idx=1)
    (x_position_embeddings): Embedding(1024, 128)
    (y_position_embeddings): Embedding(1024, 128)
    (h_position_embeddings): Embedding(1024, 128)
    (w_position_embeddings): Embedding(1024, 128)
  )
  (patch_embed): LayoutLMv3PatchEmbeddings(
    (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
  )
  (pos_drop): Dropout(p=0.0, inplace=False)
  (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
  (encoder): LayoutLMv3Encoder(
    (layer): ModuleList(
      (0-11): 12 x LayoutLMv3Layer(
        (attention): Layo