In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import os
import json
import torch.nn as nn
import math
from tqdm.auto import tqdm
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

In [2]:
for i in range(torch.cuda.device_count()):
    print(i, torch.cuda.get_device_name(i))

0 NVIDIA GeForce RTX 3090


In [3]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)

cuda:0


In [4]:
batch_size = 11
n_epochs = 500
model_no = 'transformer_with_word2vec'
exp = 1

In [5]:
class custom_transformer(nn.Module):

    def __init__(self, hidden_dim=100, nheads=5, num_encoder_layers=8, num_decoder_layers=8, avg_words = 5500):
        super(custom_transformer, self).__init__()
#         self.transformer = nn.Transformer(hidden_dim, nheads, num_encoder_layers, num_decoder_layers, 
#                                           batch_first=True, activation="relu")
        self.encoder = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=nheads, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder, num_layers = num_encoder_layers)
        
        self.decoder = nn.TransformerDecoderLayer(d_model=hidden_dim, nhead=nheads, batch_first=True)
        self.transformer_decoder = nn.TransformerDecoder(self.decoder, num_layers = num_decoder_layers)
        
#         self.linear1 = nn.Linear(hidden_dim*avg_words, hidden_dim*avg_words, bias=True)
#         self.one_D_conv = nn.Conv2d(hidden_dim*avg_words, hidden_dim*avg_words, 1, stride=1)
        self.relu_layer = nn.ReLU()
        self.sigmoid_layer = nn.Sigmoid()
        
    def positionalencoding1d(self, d_model, length):
        """
        :param d_model: dimension of the model
        :param length: length of positions
        :return: length*d_model position matrix
        """
        if d_model % 2 != 0:
            raise ValueError("Cannot use sin/cos positional encoding with "
                             "odd dim (got dim={:d})".format(d_model))
        pe = torch.zeros(length, d_model)
        position = torch.arange(0, length).unsqueeze(1)
        div_term = torch.exp((torch.arange(0, d_model, 2, dtype=torch.float) *
                             -(math.log(10000.0) / d_model)))
        pe[:, 0::2] = torch.sin(position.float() * div_term)
        pe[:, 1::2] = torch.cos(position.float() * div_term)

        return pe

    def forward(self, feat_input):
#         print(feat_input.shape)
#         feat_input = feat_input.flatten(2)
#         print(feat_input.shape)
#         print(self.positionalencoding1d(self.hidden_dim, feat_input.shape[-2]).repeat(feat_input.shape[0], feat_input.shape[1], feat_input.shape[2]).is_cuda)
#         feat_input += self.positionalencoding1d(self.hidden_dim, feat_input.shape[-2]).repeat(feat_input.shape[0], feat_input.shape[1], feat_input.shape[2])
#         features = self.transformer(feat_input.cuda(), self.learnable_query.repeat(feat_input.shape[0], 1, 1))
        enc_features = self.transformer_encoder(feat_input)
            
        dec_features = self.transformer_decoder(feat_input, enc_features)

#         features = self.linear1(dec_features.flatten(1))
#         dec_features = self.sigmoid_layer(features)

        return dec_features, enc_features

In [6]:
new_model = custom_transformer()
# new_model.to(device)
print("Total trainable params:", torch.nn.utils.parameters_to_vector([p for p in new_model.parameters() if p.requires_grad]).numel())

Total trainable params: 8511264




In [7]:
new_model

custom_transformer(
  (encoder): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=100, out_features=100, bias=True)
    )
    (linear1): Linear(in_features=100, out_features=2048, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (linear2): Linear(in_features=2048, out_features=100, bias=True)
    (norm1): LayerNorm((100,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((100,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.1, inplace=False)
    (dropout2): Dropout(p=0.1, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-7): 8 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=100, out_features=100, bias=True)
        )
        (linear1): Linear(in_features=100, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
    

In [8]:
# Removed Stop Words

In [10]:
import json
file_text = open('/home/abhijeet/Desktop/TRIZ/word_vectors_all_words_v2.json', 'r')
dictionary_text = json.load(file_text)
file_label = open('/home/abhijeet/Desktop/TRIZ/labels.json', 'r')
dictionary_label = json.load(file_label)

In [11]:
from torch.utils.data import Dataset, DataLoader

class dataset_loader(Dataset):
    
    def __init__(self, corpus_dir, word2vec_text, word2vec_label):
        self.word2vec_all_text = word2vec_text
        self.word2vec_all_label = word2vec_label
        self.dataset = pd.read_excel(corpus_dir + 'generated_data.xlsx')
        self.dataset = self.dataset.dropna()
        self.text = self.dataset['text']
        self.label = self.dataset['labels']
        
    def __len__(self):
        return self.text.shape[0]
    
    def __getitem__(self, idx):
        row_text = self.text[idx].lower().replace('\n', ' ')
        row_label = eval(self.label[idx])
        stop_words = set(stopwords.words('english'))
        word_tokens = word_tokenize(row_text)
        filtered_sentence = [w for w in word_tokens if not w.lower() in stop_words]
        filtered_sentence = []
        
        for w in word_tokens:
            if w not in stop_words:
                filtered_sentence.append(w)
        
        row_text = filtered_sentence
        
#         print(row_label, type(row_label))
        word2vec_matrix = []
#         print(len(row_text))
        for count, i in enumerate(row_text):
            try:
                if count == 5000:
                    break
                word2vec_matrix.append(np.array(self.word2vec_all_text[i]))
#                 print('Doing Text', count)
            except:
                if count == 5000:
                    break
                word2vec_matrix.append(np.zeros(100))
        
        for i in range(5000 - len(word2vec_matrix)):
            word2vec_matrix.append(np.zeros(100))
            
#         print(np.array(word2vec_matrix).shape)

        for next_count, i in enumerate(row_label):
            try:
                if next_count == 20:
                    break
                word2vec_matrix.append(np.array(self.word2vec_all_label[i]))
#                 print('Doing Labels')
            except:
                if next_count == 20:
                    break
                word2vec_matrix.append(np.zeros(100))
            
        for i in range(5020 - len(word2vec_matrix)):
            word2vec_matrix.append(np.zeros(100))

#         print(np.array(word2vec_matrix).shape)
        
        output = {'text_label': np.array(word2vec_matrix)}
        
        if np.array(word2vec_matrix).shape != (5020, 100):
            print(np.array(word2vec_matrix).shape)
            output = {'text_label': np.array(word2vec_matrix[:5020])}
        return output

In [12]:
file_text.close()
file_label.close()
train_data = dataset_loader(corpus_dir = '/home/abhijeet/Desktop/TRIZ/All_data/CPC Data/',
                              word2vec_text = dictionary_text, word2vec_label = dictionary_label)

In [13]:
# print(len(train_data))
for i in range(len(train_data)):
    sample = train_data[100]
    break

In [14]:
sample['text_label'].shape

(5020, 100)

In [15]:
train_sampler = torch.utils.data.RandomSampler(train_data)

dataloader_train = DataLoader(train_data, batch_size=batch_size, sampler=train_sampler, num_workers=0)

In [16]:
# import torch.nn.functional as F
# def criterion(predicted, target):
#     """
#     Compute the Kullback-Leibler Divergence loss between two probability distributions.

#     Args:
#         p (torch.Tensor): True distribution (e.g., ground truth probabilities).
#         q (torch.Tensor): Approximate distribution (e.g., predicted probabilities).

#     Returns:
#         torch.Tensor: KL Divergence loss.
#     """
#     assert target.shape == predicted.shape, "Target and predicted tensors must have the same shape"

#     # Apply log softmax to the target and softmax to the predicted
#     log_target = F.log_softmax(target, dim=1)
#     softmax_predicted = F.softmax(predicted, dim=1)

#     # Compute the KL divergence loss
#     loss = F.kl_div(log_target, softmax_predicted, reduction='batchmean')

#     return loss

In [17]:
import torch.nn.functional as F
def criterion(predicted, target):
    
    loss = nn.MSELoss()

    return loss(predicted, target)

In [18]:
new_model.to(device)
optimizer = torch.optim.SGD(new_model.parameters(), lr=0.009, weight_decay=0.0001, momentum=0.9)
# optimizer = torch.optim.Adam(new_model.parameters(), lr=0.001, weight_decay=0.0001)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [None]:
for epoch in range(n_epochs):
    num_nans = 0
    running_loss = 0.0
    val_loss = 0.0
    print(f"\nEpoch: {epoch+1}")
    inner_pbar = tqdm(total=len(dataloader_train), position=1, leave=False, ascii=True, desc=f"Epoch: {epoch+1} Step")
    try:
        for i, data in enumerate(dataloader_train, 1):
            features = data['text_label']
            features = features.type(torch.FloatTensor)
            features = features.to(device)
    #         print(features.is_cuda)
    #         sigmoid = nn.Sigmoid()
            y_ground_truth = torch.tensor(features, requires_grad = True)
    #         print(features.shape, y_ground_truth.shape)
            optimizer.zero_grad()
            preds, enc_results = new_model(features)

            loss = criterion(predicted=preds, target=y_ground_truth)

            inner_pbar.update(1)

            running_loss += loss.detach().item()

            print(f"\rStep: {i} Training Loss: {loss.item()} Validation Loss: {val_loss} Epoch loss: {running_loss/(i)}", end="")
            # print(f"\rStep: {i} Training Loss: {loss.item()} Validation Loss: {val_loss} Nans: {num_nans}")

            if torch.isnan(loss):
                if num_nans > 10:
                    raise RuntimeError(f"Model Error: Encountered {num_nans} nan loss")
                num_nans += 1
                continue
            # loss.requires_grad = True
            loss.backward()

            optimizer.step()
        
        if os.path.exists(f'/home/abhijeet/Desktop/TRIZ/Model/model_{model_no}/'):
            pass
        else:
            os.makedirs(f'/home/abhijeet/Desktop/TRIZ/Model/model_{model_no}/exp_{str(exp)}/')

        checkpoint_pth = f'/home/abhijeet/Desktop/TRIZ/Model/model_{model_no}/exp_{str(exp)}/trained_model_latest_epoch.pth'
        torch.save(new_model.state_dict(), checkpoint_pth)
        
        scheduler.step()
        inner_pbar.close()
    except:
        if os.path.exists(f'/home/abhijeet/Desktop/TRIZ/Model/model_{model_no}/'):
            pass
        else:
            os.makedirs(f'/home/abhijeet/Desktop/TRIZ/Model/model_{model_no}/exp_{str(exp)}/')

        checkpoint_pth = f'/home/abhijeet/Desktop/TRIZ/Model/model_{model_no}/exp_{str(exp)}/trained_model_latest_epoch.pth'
        torch.save(new_model.state_dict(), checkpoint_pth)


Epoch: 1


Epoch: 1 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

  y_ground_truth = torch.tensor(features, requires_grad = True)


Step: 527 Training Loss: 0.2466750293970108 Validation Loss: 0.0 Epoch loss: 0.407568389991655335
Epoch: 2


Epoch: 2 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 841 Training Loss: 0.25146180391311646 Validation Loss: 0.0 Epoch loss: 0.23051703840883966
Epoch: 3


Epoch: 3 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 152 Training Loss: 0.2774791717529297 Validation Loss: 0.0 Epoch loss: 0.202629222436562984
Epoch: 4


Epoch: 4 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 295 Training Loss: 0.2382313460111618 Validation Loss: 0.0 Epoch loss: 0.195396736163204028
Epoch: 5


Epoch: 5 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 652 Training Loss: 0.13979338109493256 Validation Loss: 0.0 Epoch loss: 0.18021894630296098
Epoch: 6


Epoch: 6 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 273 Training Loss: 0.15239901840686798 Validation Loss: 0.0 Epoch loss: 0.17033692700413122
Epoch: 7


Epoch: 7 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 785 Training Loss: 0.1408669650554657 Validation Loss: 0.0 Epoch loss: 0.151304923785719942
Epoch: 8


Epoch: 8 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 124 Training Loss: 0.17455480992794037 Validation Loss: 0.0 Epoch loss: 0.14234339415786739
Epoch: 9


Epoch: 9 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 191 Training Loss: 0.11058580875396729 Validation Loss: 0.0 Epoch loss: 0.13880882338079484
Epoch: 10


Epoch: 10 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 1190 Training Loss: 0.08101800084114075 Validation Loss: 0.0 Epoch loss: 0.12655237497401845
Epoch: 11


Epoch: 11 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 400 Training Loss: 0.13064992427825928 Validation Loss: 0.0 Epoch loss: 0.114541444508358834
Epoch: 12


Epoch: 12 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 71 Training Loss: 0.09306170791387558 Validation Loss: 0.0 Epoch loss: 0.10603960330637408
Epoch: 13


Epoch: 13 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 287 Training Loss: 0.08499505370855331 Validation Loss: 0.0 Epoch loss: 0.107324082573116445
Epoch: 14


Epoch: 14 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 543 Training Loss: 0.07517006248235703 Validation Loss: 0.0 Epoch loss: 0.10223509000809812
Epoch: 15


Epoch: 15 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 258 Training Loss: 0.12925468385219574 Validation Loss: 0.0 Epoch loss: 0.098674987796549655
Epoch: 16


Epoch: 16 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 129 Training Loss: 0.08404575288295746 Validation Loss: 0.0 Epoch loss: 0.09466298276832862
Epoch: 17


Epoch: 17 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 84 Training Loss: 0.10388702154159546 Validation Loss: 0.0 Epoch loss: 0.096370826519670947
Epoch: 18


Epoch: 18 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 9 Training Loss: 0.09896181523799896 Validation Loss: 0.0 Epoch loss: 0.09448232584529453
Epoch: 19


Epoch: 19 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 51 Training Loss: 0.0892123281955719 Validation Loss: 0.0 Epoch loss: 0.090524352794768775
Epoch: 20


Epoch: 20 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 9 Training Loss: 0.07913972437381744 Validation Loss: 0.0 Epoch loss: 0.08847698900434706
Epoch: 21


Epoch: 21 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 234 Training Loss: 0.08085363358259201 Validation Loss: 0.0 Epoch loss: 0.091637583863404077
Epoch: 22


Epoch: 22 Step:   0%|          | 0/15796 [00:00<?, ?it/s]

Step: 150 Training Loss: 0.10126844048500061 Validation Loss: 0.0 Epoch loss: 0.08878862845400969