In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import os
import json
import torch.nn as nn
import math
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
for i in range(torch.cuda.device_count()):
    print(i, torch.cuda.get_device_name(i))

0 NVIDIA GeForce RTX 3090


In [3]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)

cuda:0


In [4]:
batch_size = 16
n_epochs = 500
model_no = 'transformer_with_word2vec'
exp = 1

In [5]:
class custom_transformer(nn.Module):

    def __init__(self, no_out_vect = 128, num_classes=128, hidden_dim=100, nheads=10, num_encoder_layers=5, 
                 num_decoder_layers=5):
        super(custom_transformer, self).__init__()
        self.hidden_dim = hidden_dim
#         self.transformer = nn.Transformer(hidden_dim, nheads, num_encoder_layers, num_decoder_layers, 
#                                           batch_first=True, activation="relu")
        self.encoder = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=nheads)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder, num_layers = num_encoder_layers)
        
        self.decoder = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=nheads)
        self.transformer_decoder = nn.TransformerEncoder(self.encoder, num_layers = num_encoder_layers)
        
        self.relu_layer = nn.ReLU()
        self.sigmoid_layer = nn.Sigmoid()
        
    def positionalencoding1d(self, d_model, length):
        """
        :param d_model: dimension of the model
        :param length: length of positions
        :return: length*d_model position matrix
        """
        if d_model % 2 != 0:
            raise ValueError("Cannot use sin/cos positional encoding with "
                             "odd dim (got dim={:d})".format(d_model))
        pe = torch.zeros(length, d_model)
        position = torch.arange(0, length).unsqueeze(1)
        div_term = torch.exp((torch.arange(0, d_model, 2, dtype=torch.float) *
                             -(math.log(10000.0) / d_model)))
        pe[:, 0::2] = torch.sin(position.float() * div_term)
        pe[:, 1::2] = torch.cos(position.float() * div_term)

        return pe

    def forward(self, feat_input):
        feat_input = feat_input.flatten(2)

        feat_input += self.positionalencoding1d(self.hidden_dim, feat_input.shape[-2]).repeat(feat_input.shape[0], feat_input.shape[1], feat_input.shape[2])
#         features = self.transformer(feat_input.cuda(), self.learnable_query.repeat(feat_input.shape[0], 1, 1))
        enc_features = self.transformer_encoder(feat_input)
        dec_features = self.transformer_decoder(feat_input, enc_features)
        features = self.linear1(features.flatten(1))
        features = self.sigmoid_layer(features)

        return features

In [6]:
new_model = custom_transformer()
# new_model.to(device)
print("Total trainable params:", torch.nn.utils.parameters_to_vector([p for p in new_model.parameters() if p.requires_grad]).numel())

Total trainable params: 5430576




In [7]:
new_model

custom_transformer(
  (encoder): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=100, out_features=100, bias=True)
    )
    (linear1): Linear(in_features=100, out_features=2048, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (linear2): Linear(in_features=2048, out_features=100, bias=True)
    (norm1): LayerNorm((100,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((100,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.1, inplace=False)
    (dropout2): Dropout(p=0.1, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-4): 5 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=100, out_features=100, bias=True)
        )
        (linear1): Linear(in_features=100, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
    

In [8]:
# Keeping Stop Words

In [9]:
from torch.utils.data import Dataset, DataLoader

class dataset_loader(Dataset):
    
    def __init__(self, corpus_dir, word2vec_dir):
        self.corpus_dir = corpus_dir
        self.word2vec_all = open('/home/abhijeet/Desktop/TRIZ/word_vectors.json', 'r')
        self.word2vec_all = json.load(self.word2vec_all)
        self.dataset = pd.read_excel(self.corpus_dir + 'generated_data.xlsx')
        self.dataset = self.dataset.dropna()
        self.text = self.dataset['text']
        self.label = self.dataset['labels']
        
    def __len__(self):
        return self.text.shape[0]
    
    def __getitem__(self, idx):
        row_text = self.text[idx].lower().replace('\n', ' ').split()
        row_label = eval(self.label[idx])
#         print(row_label.shape, row_text.shape)
        word2vec_matrix = []
        
        for count, i in enumerate(row_text):
            try:
                word2vec_matrix.append(np.array(self.word2vec_all[i]))
                if count == 20000:
                    break
            except:
                word2vec_matrix.append(np.zeros(100))
        
        for i in range(20000 - len(word2vec_matrix)):
            word2vec_matrix.append(np.zeros(100))
            
#         print(len(word2vec_matrix))

        for next_count, i in enumerate(row_label):
            try:
                word2vec_matrix.append(np.array(self.word2vec_all[i]))
                if next_count == 499:
                    break
            except:
                word2vec_matrix.append(np.zeros(100))
            
        for i in range(499 - next_count):
            word2vec_matrix.append(np.zeros(100))

#         print(len(word2vec_matrix))
        
        output = {'text_label': np.array(word2vec_matrix)}
        
        return output

In [10]:
train_data = dataset_loader(corpus_dir='/home/abhijeet/Desktop/TRIZ/All_data/CPC Data/',
                              word2vec_dir='/home/abhijeet/Desktop/TRIZ/word_vectors.json')

In [11]:
# print(len(train_data))
for i in range(len(train_data)):
    sample = train_data[i]
    break

In [12]:
batch_size = 256

In [13]:
train_sampler = torch.utils.data.RandomSampler(train_data)

dataloader_train = DataLoader(train_data, batch_size=batch_size, sampler=train_sampler, num_workers=0)

In [14]:
import torch.nn.functional as F
def criterion(predicted, target):
    """
    Compute the Kullback-Leibler Divergence loss between two probability distributions.

    Args:
        p (torch.Tensor): True distribution (e.g., ground truth probabilities).
        q (torch.Tensor): Approximate distribution (e.g., predicted probabilities).

    Returns:
        torch.Tensor: KL Divergence loss.
    """
    return F.kl_div(F.log_softmax(target, dim=1), F.softmax(predicted, dim=1), reduction='batchmean')

In [15]:
new_model.to(device)
optimizer = torch.optim.SGD(new_model.parameters(), lr=0.009, weight_decay=0.0001, momentum=0.9)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [16]:
for epoch in range(n_epochs):
    num_nans = 0
    running_loss = 0.0
    val_loss = 0.0
    print(f"\nEpoch: {epoch+1}")
    inner_pbar = tqdm(total=len(dataloader_train), position=1, leave=False, ascii=True, desc=f"Epoch: {epoch+1} Step")

    for i, data in enumerate(dataloader_train, 1):
        features = data['text_label']
        y_ground_truth = features
        optimizer.zero_grad()
        print(features.shape)
        preds = new_model(features)
        
        loss = criterion(predicted=preds, target=y_ground_truth)

        inner_pbar.update(1)
        
        running_loss += loss.detach().item()

        print(f"\rStep: {i} Training Loss: {loss.item()} Validation Loss: {val_loss} Epoch loss: {running_loss/(i)}", end="")
        # print(f"\rStep: {i} Training Loss: {loss.item()} Validation Loss: {val_loss} Nans: {num_nans}")

        if torch.isnan(loss):
            if num_nans > 10:
                raise RuntimeError(f"Model Error: Encountered {num_nans} nan loss")
            num_nans += 1
            continue
        # loss.requires_grad = True
        loss.backward()

        optimizer.step()
    scheduler.step()
    inner_pbar.close()


Epoch: 1



Epoch: 1 Step:   0%|                                    | 0/679 [00:00<?, ?it/s][A

torch.Size([256, 20500, 100])


RuntimeError: [enforce fail at alloc_cpu.cpp:117] err == 0. DefaultCPUAllocator: can't allocate memory: you tried to allocate 4303360000000000 bytes. Error code 12 (Cannot allocate memory)

In [None]:
import torch

# Create a tensor
original_tensor = torch.tensor([[1, 2], [3, 4]])

# Repeat the tensor along specified dimensions
repeated_tensor = original_tensor.repeat(2, 3)  # Repeat along dimension 0, 2 times, and along dimension 1, 3 times

print("Original Tensor:")
print(original_tensor)

print("\nRepeated Tensor:")
print(repeated_tensor)
