In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import os
import json
import torch.nn as nn
import math
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
for i in range(torch.cuda.device_count()):
    print(i, torch.cuda.get_device_name(i))

0 NVIDIA GeForce RTX 3090


In [3]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)

cuda:0


In [4]:
batch_size = 16
n_epochs = 500
model_no = 'transformer_with_word2vec'
exp = 1

In [5]:
# With square kernels and equal stride
# m = nn.Conv2d(5500, 5500, 1, stride=1)
# non-square kernels and unequal stride and with padding
m = nn.Conv1d(5500, 5500, 1, stride=1)
# # non-square kernels and unequal stride and with padding and dilation
# m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
input = torch.randn(20, 5500, 1)
output = m(input)
print(output.shape)

torch.Size([20, 5500, 1])


In [6]:
class custom_transformer(nn.Module):

    def __init__(self, hidden_dim=100, nheads=5, num_encoder_layers=8, num_decoder_layers=8, avg_words = 5500):
        super(custom_transformer, self).__init__()
#         self.transformer = nn.Transformer(hidden_dim, nheads, num_encoder_layers, num_decoder_layers, 
#                                           batch_first=True, activation="relu")
        self.encoder = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=nheads, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder, num_layers = num_encoder_layers)
        
        self.decoder = nn.TransformerDecoderLayer(d_model=hidden_dim, nhead=nheads, batch_first=True)
        self.transformer_decoder = nn.TransformerDecoder(self.decoder, num_layers = num_decoder_layers)
        
#         self.linear1 = nn.Linear(hidden_dim*avg_words, hidden_dim*avg_words, bias=True)
#         self.one_D_conv = nn.Conv2d(hidden_dim*avg_words, hidden_dim*avg_words, 1, stride=1)
        self.relu_layer = nn.ReLU()
        self.sigmoid_layer = nn.Sigmoid()
        
    def positionalencoding1d(self, d_model, length):
        """
        :param d_model: dimension of the model
        :param length: length of positions
        :return: length*d_model position matrix
        """
        if d_model % 2 != 0:
            raise ValueError("Cannot use sin/cos positional encoding with "
                             "odd dim (got dim={:d})".format(d_model))
        pe = torch.zeros(length, d_model)
        position = torch.arange(0, length).unsqueeze(1)
        div_term = torch.exp((torch.arange(0, d_model, 2, dtype=torch.float) *
                             -(math.log(10000.0) / d_model)))
        pe[:, 0::2] = torch.sin(position.float() * div_term)
        pe[:, 1::2] = torch.cos(position.float() * div_term)

        return pe

    def forward(self, feat_input):
#         print(feat_input.shape)
#         feat_input = feat_input.flatten(2)
#         print(feat_input.shape)
#         print(self.positionalencoding1d(self.hidden_dim, feat_input.shape[-2]).repeat(feat_input.shape[0], feat_input.shape[1], feat_input.shape[2]).is_cuda)
#         feat_input += self.positionalencoding1d(self.hidden_dim, feat_input.shape[-2]).repeat(feat_input.shape[0], feat_input.shape[1], feat_input.shape[2])
#         features = self.transformer(feat_input.cuda(), self.learnable_query.repeat(feat_input.shape[0], 1, 1))
        enc_features = self.transformer_encoder(feat_input)
            
        dec_features = self.transformer_decoder(feat_input, enc_features)

#         features = self.linear1(dec_features.flatten(1))
        dec_features = self.sigmoid_layer(features)

        return dec_features

In [7]:
new_model = custom_transformer()
# new_model.to(device)
print("Total trainable params:", torch.nn.utils.parameters_to_vector([p for p in new_model.parameters() if p.requires_grad]).numel())

Total trainable params: 8511264




In [8]:
new_model

custom_transformer(
  (encoder): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=100, out_features=100, bias=True)
    )
    (linear1): Linear(in_features=100, out_features=2048, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (linear2): Linear(in_features=2048, out_features=100, bias=True)
    (norm1): LayerNorm((100,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((100,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.1, inplace=False)
    (dropout2): Dropout(p=0.1, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-7): 8 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=100, out_features=100, bias=True)
        )
        (linear1): Linear(in_features=100, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
    

In [9]:
# Keeping Stop Words

In [10]:
from torch.utils.data import Dataset, DataLoader

class dataset_loader(Dataset):
    
    def __init__(self, corpus_dir, word2vec_dir):
        self.word2vec_all = open(word2vec_dir, 'r')
        self.word2vec_all = json.load(self.word2vec_all)
        self.dataset = pd.read_excel(corpus_dir + 'generated_data.xlsx')
        self.dataset = self.dataset.dropna()
        self.text = self.dataset['text']
        self.label = self.dataset['labels']
        
    def __len__(self):
        return self.text.shape[0]
    
    def __getitem__(self, idx):
        row_text = self.text[idx].lower().replace('\n', ' ').split()
        row_label = eval(self.label[idx])
#         print(row_label.shape, row_text.shape)
        word2vec_matrix = []
        
        for count, i in enumerate(row_text):
            try:
                word2vec_matrix.append(np.array(self.word2vec_all[i]))
                if count == 5000:
                    break
            except:
                word2vec_matrix.append(np.zeros(100))
        
        for i in range(5000 - len(word2vec_matrix)):
            word2vec_matrix.append(np.zeros(100))
            
#         print(len(word2vec_matrix))

        for next_count, i in enumerate(row_label):
            try:
                word2vec_matrix.append(np.array(self.word2vec_all[i]))
                if next_count == 499:
                    break
            except:
                word2vec_matrix.append(np.zeros(100))
            
        for i in range(499 - next_count):
            word2vec_matrix.append(np.zeros(100))

#         print(len(word2vec_matrix))
        
        output = {'text_label': np.array(word2vec_matrix)}
        
        return output

In [11]:
train_data = dataset_loader(corpus_dir='/home/abhijeet/Desktop/TRIZ/All_data/CPC Data/',
                              word2vec_dir='/home/abhijeet/Desktop/TRIZ/word_vectors.json')

In [12]:
# print(len(train_data))
for i in range(len(train_data)):
    sample = train_data[i]
    break

In [13]:
batch_size = 1

In [14]:
train_sampler = torch.utils.data.RandomSampler(train_data)

dataloader_train = DataLoader(train_data, batch_size=batch_size, sampler=train_sampler, num_workers=0)

In [15]:
import torch.nn.functional as F
def criterion(predicted, target):
    """
    Compute the Kullback-Leibler Divergence loss between two probability distributions.

    Args:
        p (torch.Tensor): True distribution (e.g., ground truth probabilities).
        q (torch.Tensor): Approximate distribution (e.g., predicted probabilities).

    Returns:
        torch.Tensor: KL Divergence loss.
    """
    assert target.shape == predicted.shape, "Target and predicted tensors must have the same shape"

    # Apply log softmax to the target and softmax to the predicted
    log_target = F.log_softmax(target, dim=1)
    softmax_predicted = F.softmax(predicted, dim=1)

    # Compute the KL divergence loss
    loss = F.kl_div(log_target, softmax_predicted, reduction='batchmean')

    return loss
#     return F.kl_div(predicted, target)

In [16]:
new_model.to(device)
optimizer = torch.optim.SGD(new_model.parameters(), lr=0.009, weight_decay=0.0001, momentum=0.9)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [17]:
for epoch in range(n_epochs):
    num_nans = 0
    running_loss = 0.0
    val_loss = 0.0
    print(f"\nEpoch: {epoch+1}")
    inner_pbar = tqdm(total=len(dataloader_train), position=1, leave=False, ascii=True, desc=f"Epoch: {epoch+1} Step")

    for i, data in enumerate(dataloader_train, 1):
        features = data['text_label']
        features = features.type(torch.FloatTensor)
        features = features.to(device)
#         print(features.is_cuda)
        y_ground_truth = torch.tensor(features, requires_grad = True)
        optimizer.zero_grad()
        preds = new_model(features)
        
        loss = criterion(predicted=preds, target=y_ground_truth)

        inner_pbar.update(1)
        
        running_loss += loss.detach().item()

        print(f"\rStep: {i} Training Loss: {loss.item()} Validation Loss: {val_loss} Epoch loss: {running_loss/(i)}", end="")
        # print(f"\rStep: {i} Training Loss: {loss.item()} Validation Loss: {val_loss} Nans: {num_nans}")

        if torch.isnan(loss):
            if num_nans > 10:
                raise RuntimeError(f"Model Error: Encountered {num_nans} nan loss")
            num_nans += 1
            continue
        # loss.requires_grad = True
        loss.backward()

        optimizer.step()
    scheduler.step()
    inner_pbar.close()


Epoch: 1



  y_ground_truth = torch.tensor(features, requires_grad = True)

Epoch: 1 Step:   0%|                      | 1/173749 [00:00<11:39:35,  4.14it/s][A
Epoch: 1 Step:   0%|                       | 2/173749 [00:00<8:04:37,  5.98it/s][A

Step: 1 Training Loss: 2.1737499237060547 Validation Loss: 0.0 Epoch loss: 2.1737499237060547


Epoch: 1 Step:   0%|                       | 3/173749 [00:00<7:29:08,  6.45it/s][A
Epoch: 1 Step:   0%|                       | 4/173749 [00:00<7:15:51,  6.64it/s][A

Step: 2 Training Loss: 0.6024421453475952 Validation Loss: 0.0 Epoch loss: 1.388096034526825Step: 3 Training Loss: 0.17988668382167816 Validation Loss: 0.0 Epoch loss: 0.9853595842917761


Epoch: 1 Step:   0%|                       | 5/173749 [00:00<7:07:25,  6.77it/s][A
Epoch: 1 Step:   0%|                       | 6/173749 [00:00<7:00:01,  6.89it/s][A

Step: 4 Training Loss: 0.854159951210022 Validation Loss: 0.0 Epoch loss: 0.9525596760213375Step: 5 Training Loss: 0.9188051223754883 Validation Loss: 0.0 Epoch loss: 0.9458087652921676


Epoch: 1 Step:   0%|                       | 7/173749 [00:01<7:00:46,  6.88it/s][A
Epoch: 1 Step:   0%|                       | 8/173749 [00:01<6:49:55,  7.06it/s][A

Step: 6 Training Loss: 0.5385774374008179 Validation Loss: 0.0 Epoch loss: 0.877936877310276Step: 7 Training Loss: 0.4847286343574524 Validation Loss: 0.0 Epoch loss: 0.8217642711741584


Epoch: 1 Step:   0%|                       | 9/173749 [00:01<6:54:49,  6.98it/s][A
Epoch: 1 Step:   0%|                      | 10/173749 [00:01<6:53:21,  7.01it/s][A

Step: 8 Training Loss: 0.8768552541732788 Validation Loss: 0.0 Epoch loss: 0.8286506440490484Step: 9 Training Loss: 0.4892069697380066 Validation Loss: 0.0 Epoch loss: 0.7909346802367104


Epoch: 1 Step:   0%|                      | 11/173749 [00:01<6:49:07,  7.08it/s][A
Epoch: 1 Step:   0%|                      | 12/173749 [00:01<6:51:47,  7.03it/s][A

Step: 10 Training Loss: 0.6125763654708862 Validation Loss: 0.0 Epoch loss: 0.773098848760128Step: 11 Training Loss: 0.2794008255004883 Validation Loss: 0.0 Epoch loss: 0.7282172102819789


Epoch: 1 Step:   0%|                      | 13/173749 [00:01<6:52:43,  7.02it/s][A
Epoch: 1 Step:   0%|                      | 14/173749 [00:02<6:49:57,  7.06it/s][A

Step: 12 Training Loss: 1.097937822341919 Validation Loss: 0.0 Epoch loss: 0.759027261286974Step: 13 Training Loss: 0.5271157622337341 Validation Loss: 0.0 Epoch loss: 0.7411879152059555


Epoch: 1 Step:   0%|                      | 15/173749 [00:02<6:59:14,  6.91it/s][A
Epoch: 1 Step:   0%|                      | 16/173749 [00:02<6:47:57,  7.10it/s][A

Step: 14 Training Loss: 0.9379440546035767 Validation Loss: 0.0 Epoch loss: 0.7552419251629284Step: 15 Training Loss: 1.7827752828598022 Validation Loss: 0.0 Epoch loss: 0.8237441490093868


Epoch: 1 Step:   0%|                      | 17/173749 [00:02<6:41:06,  7.22it/s][A
Epoch: 1 Step:   0%|                      | 18/173749 [00:02<6:41:31,  7.21it/s][A

Step: 16 Training Loss: 0.6799180507659912 Validation Loss: 0.0 Epoch loss: 0.8147550178691745Step: 17 Training Loss: 0.41428688168525696 Validation Loss: 0.0 Epoch loss: 0.7911980686818852


Epoch: 1 Step:   0%|                      | 19/173749 [00:02<6:35:14,  7.33it/s][A
Epoch: 1 Step:   0%|                      | 20/173749 [00:02<6:35:34,  7.32it/s][A

Step: 18 Training Loss: 0.4993324279785156 Validation Loss: 0.0 Epoch loss: 0.7749833108650314Step: 19 Training Loss: 0.7077677249908447 Validation Loss: 0.0 Epoch loss: 0.7714456484506005


Epoch: 1 Step:   0%|                      | 21/173749 [00:03<6:40:32,  7.23it/s][A
Epoch: 1 Step:   0%|                      | 22/173749 [00:03<6:47:30,  7.11it/s][A

Step: 20 Training Loss: 0.520099937915802 Validation Loss: 0.0 Epoch loss: 0.7588783629238606Step: 21 Training Loss: 0.3039865791797638 Validation Loss: 0.0 Epoch loss: 0.7372168494122369


Epoch: 1 Step:   0%|                      | 23/173749 [00:03<6:46:45,  7.12it/s][A
Epoch: 1 Step:   0%|                      | 24/173749 [00:03<6:45:58,  7.13it/s][A

Step: 22 Training Loss: 0.7507443428039551 Validation Loss: 0.0 Epoch loss: 0.7378317354754969Step: 23 Training Loss: 0.6483144760131836 Validation Loss: 0.0 Epoch loss: 0.7339396807162658


Epoch: 1 Step:   0%|                      | 25/173749 [00:03<6:47:55,  7.10it/s][A
Epoch: 1 Step:   0%|                      | 26/173749 [00:03<6:49:48,  7.07it/s][A

Step: 24 Training Loss: 0.38597607612609863 Validation Loss: 0.0 Epoch loss: 0.7194411971916755Step: 25 Training Loss: 0.47369274497032166 Validation Loss: 0.0 Epoch loss: 0.7096112591028213


Epoch: 1 Step:   0%|                      | 27/173749 [00:03<7:05:49,  6.80it/s][A
Epoch: 1 Step:   0%|                      | 28/173749 [00:04<7:00:36,  6.88it/s][A

Step: 26 Training Loss: 1.676978349685669 Validation Loss: 0.0 Epoch loss: 0.7468176856637001Step: 27 Training Loss: 0.9671545624732971 Validation Loss: 0.0 Epoch loss: 0.7549783107307222


Epoch: 1 Step:   0%|                      | 29/173749 [00:04<6:52:26,  7.02it/s][A
Epoch: 1 Step:   0%|                      | 30/173749 [00:04<6:55:42,  6.96it/s][A

Step: 28 Training Loss: 0.28096190094947815 Validation Loss: 0.0 Epoch loss: 0.7380491532385349Step: 29 Training Loss: 1.2526551485061646 Validation Loss: 0.0 Epoch loss: 0.7557941875581083


Epoch: 1 Step:   0%|                      | 31/173749 [00:04<6:55:02,  6.98it/s][A
Epoch: 1 Step:   0%|                      | 32/173749 [00:04<6:52:31,  7.02it/s][A

Step: 30 Training Loss: 0.6670126914978027 Validation Loss: 0.0 Epoch loss: 0.7528348043560982Step: 31 Training Loss: 0.235700324177742 Validation Loss: 0.0 Epoch loss: 0.7361530469309899


Epoch: 1 Step:   0%|                      | 33/173749 [00:04<6:45:32,  7.14it/s][A
Epoch: 1 Step:   0%|                      | 34/173749 [00:04<6:56:53,  6.94it/s][A

Step: 32 Training Loss: 0.5043062567710876 Validation Loss: 0.0 Epoch loss: 0.728907834738493Step: 33 Training Loss: 0.40166598558425903 Validation Loss: 0.0 Epoch loss: 0.7189914150671526


Epoch: 1 Step:   0%|                      | 35/173749 [00:05<6:57:55,  6.93it/s][A
Epoch: 1 Step:   0%|                      | 36/173749 [00:05<6:50:28,  7.05it/s][A

Step: 34 Training Loss: 0.13728468120098114 Validation Loss: 0.0 Epoch loss: 0.7018823934828534Step: 35 Training Loss: 0.8659298419952393 Validation Loss: 0.0 Epoch loss: 0.7065694634403501


Epoch: 1 Step:   0%|                      | 37/173749 [00:05<6:57:58,  6.93it/s][A
Epoch: 1 Step:   0%|                      | 38/173749 [00:05<6:54:59,  6.98it/s][A

Step: 36 Training Loss: 0.40056735277175903 Validation Loss: 0.0 Epoch loss: 0.698069404810667Step: 37 Training Loss: 0.8175098896026611 Validation Loss: 0.0 Epoch loss: 0.7012975260212615


Epoch: 1 Step:   0%|                      | 39/173749 [00:05<6:52:14,  7.02it/s][A
Epoch: 1 Step:   0%|                      | 40/173749 [00:05<6:51:20,  7.04it/s][A

Step: 38 Training Loss: 1.0123860836029053 Validation Loss: 0.0 Epoch loss: 0.7094840670102521Step: 39 Training Loss: 0.28905513882637024 Validation Loss: 0.0 Epoch loss: 0.6987038380824603


Epoch: 1 Step:   0%|                      | 41/173749 [00:05<7:04:11,  6.82it/s][A
Epoch: 1 Step:   0%|                      | 42/173749 [00:06<7:07:12,  6.78it/s][A

Step: 40 Training Loss: 2.0395355224609375 Validation Loss: 0.0 Epoch loss: 0.7322246301919222Step: 41 Training Loss: 0.42142221331596375 Validation Loss: 0.0 Epoch loss: 0.72464408343885


Epoch: 1 Step:   0%|                      | 43/173749 [00:06<7:01:21,  6.87it/s][A
Epoch: 1 Step:   0%|                      | 44/173749 [00:06<6:53:50,  7.00it/s][A

Step: 42 Training Loss: 1.5273233652114868 Validation Loss: 0.0 Epoch loss: 0.7437554949096271Step: 43 Training Loss: 0.643120288848877 Validation Loss: 0.0 Epoch loss: 0.7414151412803073


Epoch: 1 Step:   0%|                      | 45/173749 [00:06<7:04:11,  6.82it/s][A
Epoch: 1 Step:   0%|                      | 46/173749 [00:06<6:59:10,  6.91it/s][A

Step: 44 Training Loss: 0.8368495106697083 Validation Loss: 0.0 Epoch loss: 0.7435841042209755Step: 45 Training Loss: 2.5515990257263184 Validation Loss: 0.0 Epoch loss: 0.7837622135877609


Epoch: 1 Step:   0%|                      | 47/173749 [00:06<7:08:03,  6.76it/s][A
Epoch: 1 Step:   0%|                      | 48/173749 [00:06<7:04:13,  6.82it/s][A

Step: 46 Training Loss: 0.5309416651725769 Validation Loss: 0.0 Epoch loss: 0.77826611470917Step: 47 Training Loss: 0.8082621097564697 Validation Loss: 0.0 Epoch loss: 0.7789043273697508


Epoch: 1 Step:   0%|                      | 49/173749 [00:07<6:53:25,  7.00it/s][A
Epoch: 1 Step:   0%|                      | 50/173749 [00:07<7:01:15,  6.87it/s][A

Step: 48 Training Loss: 0.5133313536643982 Validation Loss: 0.0 Epoch loss: 0.7733715570842227Step: 49 Training Loss: 1.143277883529663 Validation Loss: 0.0 Epoch loss: 0.7809206657871908


Epoch: 1 Step:   0%|                      | 51/173749 [00:07<7:09:53,  6.73it/s][A
Epoch: 1 Step:   0%|                      | 52/173749 [00:07<7:01:49,  6.86it/s][A

Step: 50 Training Loss: 0.3951103091239929 Validation Loss: 0.0 Epoch loss: 0.7732044586539268Step: 51 Training Loss: 1.107986569404602 Validation Loss: 0.0 Epoch loss: 0.7797688137666852


Epoch: 1 Step:   0%|                      | 53/173749 [00:07<6:52:42,  7.01it/s][A
Epoch: 1 Step:   0%|                      | 54/173749 [00:07<6:57:13,  6.94it/s][A

Step: 52 Training Loss: 0.36590635776519775 Validation Loss: 0.0 Epoch loss: 0.7718099203820412Step: 53 Training Loss: 0.44168007373809814 Validation Loss: 0.0 Epoch loss: 0.7655810553510234


Epoch: 1 Step:   0%|                      | 55/173749 [00:07<6:58:55,  6.91it/s][A
Epoch: 1 Step:   0%|                      | 56/173749 [00:08<6:57:22,  6.94it/s][A

Step: 54 Training Loss: 0.698840856552124 Validation Loss: 0.0 Epoch loss: 0.7643451257436363Step: 55 Training Loss: 0.955791175365448 Validation Loss: 0.0 Epoch loss: 0.7678259630094875


Epoch: 1 Step:   0%|                      | 57/173749 [00:08<6:50:20,  7.05it/s][A

Step: 56 Training Loss: 0.9709116220474243 Validation Loss: 0.0 Epoch loss: 0.7714524926351649Step: 57 Training Loss: 0.48587048053741455 Validation Loss: 0.0 Epoch loss: 0.7664422818966079

KeyboardInterrupt: 