In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
import os
import json
import torch.nn as nn
import math
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
for i in range(torch.cuda.device_count()):
    print(i, torch.cuda.get_device_name(i))

0 NVIDIA GeForce RTX 3090


In [3]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)

cuda:0


In [4]:
batch_size = 16
n_epochs = 500
model_no = 'transformer_with_word2vec'
exp = 1

In [5]:
class custom_transformer(nn.Module):

    def __init__(self, no_out_vect = 128, num_classes=128, hidden_dim=512, nheads=8, num_encoder_layers=5, 
                 num_decoder_layers=5):
        super(custom_transformer, self).__init__()
        self.hidden_dim = hidden_dim
#         self.transformer = nn.Transformer(hidden_dim, nheads, num_encoder_layers, num_decoder_layers, 
#                                           batch_first=True, activation="relu")
        self.encoder = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=nheads)
        self.transformer_encoder = nn.TransformerEncoder(self.encoder, num_layers = num_encoder_layers)
        
        self.decoder = nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=nheads)
        self.transformer_decoder = nn.TransformerEncoder(self.encoder, num_layers = num_encoder_layers)
        
        self.relu_layer = nn.ReLU()
        self.sigmoid_layer = nn.Sigmoid()
        
    def positionalencoding1d(self, d_model, length):
        """
        :param d_model: dimension of the model
        :param length: length of positions
        :return: length*d_model position matrix
        """
        if d_model % 2 != 0:
            raise ValueError("Cannot use sin/cos positional encoding with "
                             "odd dim (got dim={:d})".format(d_model))
        pe = torch.zeros(length, d_model)
        position = torch.arange(0, length).unsqueeze(1)
        div_term = torch.exp((torch.arange(0, d_model, 2, dtype=torch.float) *
                             -(math.log(10000.0) / d_model)))
        pe[:, 0::2] = torch.sin(position.float() * div_term)
        pe[:, 1::2] = torch.cos(position.float() * div_term)

        return pe

    def forward(self, feat_input):
        feat_input = feat_input.flatten(2).permute(0, 2, 1)

        feat_input += self.positionalencoding1d(self.hidden_dim, feat_input.shape[-2]).repeat(feat_input.shape[0], 1, 1)
#         features = self.transformer(feat_input.cuda(), self.learnable_query.repeat(feat_input.shape[0], 1, 1))
        enc_features = self.transformer_encoder(feat_input)
        dec_features = self.transformer_decoder(feat_input, enc_features)
        features = self.linear1(features.flatten(1))
        features = self.sigmoid_layer(features)

        return features

In [6]:
new_model = custom_transformer()
# new_model.to(device)
print("Total trainable params:", torch.nn.utils.parameters_to_vector([p for p in new_model.parameters() if p.requires_grad]).numel())



Total trainable params: 37828608


In [7]:
new_model

custom_transformer(
  (encoder): TransformerEncoderLayer(
    (self_attn): MultiheadAttention(
      (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
    )
    (linear1): Linear(in_features=512, out_features=2048, bias=True)
    (dropout): Dropout(p=0.1, inplace=False)
    (linear2): Linear(in_features=2048, out_features=512, bias=True)
    (norm1): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (norm2): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (dropout1): Dropout(p=0.1, inplace=False)
    (dropout2): Dropout(p=0.1, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-4): 5 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=512, out_features=512, bias=True)
        )
        (linear1): Linear(in_features=512, out_features=2048, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
    

In [8]:
# Keeping Stop Words

In [9]:
from torch.utils.data import Dataset, DataLoader

class dataset_loader(Dataset):
    
    def __init__(self, corpus_dir, word2vec_dir):
        self.corpus_dir = corpus_dir
        self.word2vec_all = open('/home/abhijeet/Desktop/TRIZ/word_vectors.json', 'r')
        self.word2vec_all = json.load(self.word2vec_all)
        self.text = pd.read_excel(self.corpus_dir + 'generated_data.xlsx')['text']
        self.label = pd.read_excel(self.corpus_dir + 'generated_data.xlsx')['labels']
        
    def __len__(self):
        return self.text.shape[0]
    
    def __getitem__(self, idx):
        row_text = self.text[idx].lower().replace('\n', ' ').split()
        row_label = eval(self.label[idx])
        print(type(row_label), row_label)
#         print(row_label.shape, row_text.shape)
        word2vec_matrix = []
        
        for count, i in enumerate(row_text):
            try:
                word2vec_matrix.append(np.array(self.word2vec_all[i]))
                if count == 20000:
                    break
            except:
                word2vec_matrix.append(np.zeros(100))
        
        for i in range(20000 - len(word2vec_matrix)):
            word2vec_matrix.append(np.zeros(100))
            
#         print(len(word2vec_matrix))

        for next_count, i in enumerate(row_label):
            try:
                word2vec_matrix.append(np.array(self.word2vec_all[i]))
                if next_count == 499:
                    break
            except:
                word2vec_matrix.append(np.zeros(100))
            
        for i in range(499 - next_count):
            word2vec_matrix.append(np.zeros(100))

#         print(len(word2vec_matrix))
        
        output = {'text_label': np.array(word2vec_matrix)}
        
        return output

In [10]:
train_data = dataset_loader(corpus_dir='/home/abhijeet/Desktop/TRIZ/All_data/CPC Data/',
                              word2vec_dir='/home/abhijeet/Desktop/TRIZ/word_vectors.json')

In [11]:
# print(len(train_data))
for i in range(len(train_data)):
    sample = train_data[i]
    break

<class 'list'> ['A44B19/00', 'A41D10/00', 'A44B19/26', 'A41D15/04', 'A44B19/262', 'A47G9/086']
A44B19/00
A41D10/00
A44B19/26
A41D15/04
A44B19/262
A47G9/086


In [12]:
batch_size = 256

In [13]:
train_sampler = torch.utils.data.RandomSampler(train_data)

dataloader_train = DataLoader(train_data, batch_size=batch_size, sampler=train_sampler, num_workers=0)

In [14]:
import torch.nn.functional as F
def criterion(predicted, target):
    """
    Compute the Kullback-Leibler Divergence loss between two probability distributions.

    Args:
        p (torch.Tensor): True distribution (e.g., ground truth probabilities).
        q (torch.Tensor): Approximate distribution (e.g., predicted probabilities).

    Returns:
        torch.Tensor: KL Divergence loss.
    """
    return F.kl_div(F.log_softmax(target, dim=1), F.softmax(predicted, dim=1), reduction='batchmean')

In [15]:
new_model.to(device)
optimizer = torch.optim.SGD(new_model.parameters(), lr=0.009, weight_decay=0.0001, momentum=0.9)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)

In [16]:
for epoch in range(n_epochs):
    num_nans = 0
    running_loss = 0.0
    val_loss = 0.0
    print(f"\nEpoch: {epoch+1}")
    inner_pbar = tqdm(total=len(dataloader_train), position=1, leave=False, ascii=True, desc=f"Epoch: {epoch+1} Step")

    for i, data in enumerate(dataloader_train, 1):
        features = data['text_label']
        y_ground_truth = features
        optimizer.zero_grad()

        preds = new_model(features)
        
        loss = criterion(predicted=preds, target=y_ground_truth)

        inner_pbar.update(1)
        
        running_loss += loss.detach().item()

        print(f"\rStep: {i} Training Loss: {loss.item()} Validation Loss: {val_loss} Epoch loss: {running_loss/(i)}", end="")
        # print(f"\rStep: {i} Training Loss: {loss.item()} Validation Loss: {val_loss} Nans: {num_nans}")

        if torch.isnan(loss):
            if num_nans > 10:
                raise RuntimeError(f"Model Error: Encountered {num_nans} nan loss")
            num_nans += 1
            continue
        # loss.requires_grad = True
        loss.backward()

        optimizer.step()
    scheduler.step()
    inner_pbar.close()


Epoch: 1



Epoch: 1 Step:   0%|                                    | 0/686 [00:00<?, ?it/s][A

<class 'list'> ['G09F1/04']
G09F1/04
<class 'list'> ['G06Q10/06', 'G05B23/0283']
G06Q10/06
G05B23/0283
<class 'list'> ['H03F1/0294', 'H03F3/24', 'H03F2200/331', 'H03F1/3282', 'H03F1/32', 'H03F2201/3233', 'H03F1/3247']
H03F1/0294
H03F3/24
H03F2200/331
H03F1/3282
H03F1/32
H03F2201/3233
H03F1/3247
<class 'list'> ['Y10T29/49798', 'Y10T29/49073', 'H02K3/26', 'Y10T29/49789', 'H02K21/24', 'H02K15/02', 'Y10T29/49009', 'H02K11/33']
Y10T29/49798
Y10T29/49073
H02K3/26
Y10T29/49789
H02K21/24
H02K15/02
Y10T29/49009
H02K11/33
<class 'list'> ['B01D2253/108', 'B01D2253/304', 'B01D2253/306', 'B01D2257/102', 'B01D2259/402', 'B01D53/047', 'B01D2256/12', 'C01B13/0259', 'C01B2210/0046', 'B01D53/0415', 'B01D2259/4575']
B01D2253/108
B01D2253/304
B01D2253/306
B01D2257/102
B01D2259/402
B01D53/047
B01D2256/12
C01B13/0259
C01B2210/0046
B01D53/0415
B01D2259/4575
<class 'list'> ['H05K7/1491']
H05K7/1491
<class 'list'> ['G01R15/205', 'H02H3/08', 'H02H1/0007', 'H02H1/06']
G01R15/205
H02H3/08
H02H1/0007
H02H1/06
<cla

A61J1/2044
A61M5/1782
A61J1/2089
A61J1/2051
A61J1/2075
A61M2207/00
B65B3/003
<class 'list'> ['C09K2211/1088', 'C09K2211/1014', 'C09K2211/1033', 'C09K2211/1007', 'C09K2211/1011', 'C09K11/06', 'Y10T436/143333', 'C09K2211/1029']
C09K2211/1088
C09K2211/1014
C09K2211/1033
C09K2211/1007
C09K2211/1011
C09K11/06
Y10T436/143333
C09K2211/1029
<class 'list'> ['F25B39/04', 'F25B47/00', 'F25D2323/00283', 'F25B2600/111', 'F24F2221/22', 'F28G13/00', 'Y02B30/70']
F25B39/04
F25B47/00
F25D2323/00283
F25B2600/111
F24F2221/22
F28G13/00
Y02B30/70
<class 'list'> ['B60P3/025']
B60P3/025
<class 'list'> ['B65D35/22', 'B65D75/527', 'Y10S206/823']
B65D35/22
B65D75/527
Y10S206/823
<class 'list'> ['G11C7/1006', 'G11C2216/14', 'G11C16/26', 'G11C7/18', 'G11C16/10', 'G11C7/06', 'G11C11/5642', 'G11C16/0483', 'G11C11/5628']
G11C7/1006
G11C2216/14
G11C16/26
G11C7/18
G11C16/10
G11C7/06
G11C11/5642
G11C16/0483
G11C11/5628
<class 'list'> ['A61F2002/30878', 'A61F2002/30604', 'A61F2230/0095', 'A61F2/30767', 'A61F2002/443', '

H04L9/3073
H04L63/0442
<class 'list'> ['H04L25/14']
H04L25/14
<class 'list'> ['A61P29/00', 'A61P7/02', 'C07K14/745', 'C07K14/46']
A61P29/00
A61P7/02
C07K14/745
C07K14/46
<class 'list'> ['B65D85/546', 'B65D83/38', 'B65D83/206']
B65D85/546
B65D83/38
B65D83/206
<class 'list'> ['G01S5/0072', 'G08B21/0269', 'G08B21/0261']
G01S5/0072
G08B21/0269
G08B21/0261
<class 'list'> ['B23K9/073', 'B23K9/1056', 'B23K9/0953', 'B23K9/092']
B23K9/073
B23K9/1056
B23K9/0953
B23K9/092
<class 'list'> ['B25B13/06', 'B25B13/04']
B25B13/06
B25B13/04
<class 'list'> ['G06K7/10851', 'G06K7/10']
G06K7/10851
G06K7/10
<class 'list'> ['G03G15/2057', 'G03G15/2064', 'G03G2215/2009']
G03G15/2057
G03G15/2064
G03G2215/2009
<class 'list'> ['B26B5/001']
B26B5/001
<class 'list'> ['G11B2020/1457', 'G11B20/1426', 'G11B2020/1442', 'G11B2020/143']
G11B2020/1457
G11B20/1426
G11B2020/1442
G11B2020/143
<class 'list'> ['E04B2/7863', 'E04B2/7455', 'E04B2/82', 'E06B1/045', 'E04B2/7457', 'E04F19/022', 'E04B2002/7462']
E04B2/7863
E04B2/745

<class 'list'> ['H04L45/02', 'H04L45/22']
H04L45/02
H04L45/22
<class 'list'> ['H01R13/7035', 'H01R24/58']
H01R13/7035
H01R24/58
<class 'list'> ['H01L23/5283', 'H01L23/522', 'H01L23/528', 'H01L23/5222', 'H01L2924/0002']
H01L23/5283
H01L23/522
H01L23/528
H01L23/5222
H01L2924/0002
<class 'list'> ['H04N5/3559', 'H01L27/14609', 'H04N5/335', 'H04N5/3591']
H04N5/3559
H01L27/14609
H04N5/335
H04N5/3591
<class 'list'> ['C12N9/6421']
C12N9/6421
<class 'list'> ['C09K8/685', 'Y10S507/922', 'C08F220/58', 'C08F220/585', 'C09K8/68', 'Y10S507/903']
C09K8/685
Y10S507/922
C08F220/58
C08F220/585
C09K8/68
Y10S507/903
<class 'list'> ['B65D2205/00', 'B65D75/26', 'B65D77/225']
B65D2205/00
B65D75/26
B65D77/225
<class 'list'> ['A61P7/00', 'A61P29/00', 'A61K31/223', 'A61P9/04', 'A61P25/00', 'A61K31/415', 'A61K31/22', 'A61K31/519', 'A61K31/66', 'A61K31/662', 'A61K31/4178', 'A61P3/00', 'A61K31/401', 'A61P1/16', 'A61K31/405', 'A61P13/12', 'A61K31/554', 'A61K31/403', 'A61P37/06', 'A61K31/4184', 'A61K31/407', 'A61P31

G11C7/1057
G11C7/1051
<class 'list'> ['C07C403/24']
C07C403/24
<class 'list'> ['C23C16/45589', 'C23C16/4588', 'C30B25/12']
C23C16/45589
C23C16/4588
C30B25/12
<class 'list'> ['H01L23/53223', 'H01L23/53219', 'H01L21/76844', 'H01L21/32051', 'H01L2221/1078', 'H01L21/7685', 'H01L2924/0002', 'H01L21/76843']
H01L23/53223
H01L23/53219
H01L21/76844
H01L21/32051
H01L2221/1078
H01L21/7685
H01L2924/0002
H01L21/76843
<class 'list'> ['G01R31/385', 'G01R31/392']
G01R31/385
G01R31/392
<class 'list'> ['F16K5/0642', 'F16K5/0657', 'F16K27/067']
F16K5/0642
F16K5/0657
F16K27/067
<class 'list'> ['Y10S606/908', 'A61F2/0805', 'A61B17/8875', 'A61F2/0811', 'Y10S606/916']
Y10S606/908
A61F2/0805
A61B17/8875
A61F2/0811
Y10S606/916
<class 'list'> ['H04B1/0003', 'H04B1/28', 'H04B1/406', 'H04B15/02', 'H04B15/04', 'H04B2215/064']
H04B1/0003
H04B1/28
H04B1/406
H04B15/02
H04B15/04
H04B2215/064
<class 'list'> ['B60R3/02']
B60R3/02
<class 'list'> ['B41J11/005', 'B41J2/01', 'B41J29/026', 'B41J3/543', 'B41J2002/14491', 'B41

RuntimeError: The size of tensor a (20500) must match the size of tensor b (512) at non-singleton dimension 2

In [None]:
dataloader_train[1]