
### Initializations


#### Package imports

In [1]:
import torch
from torchtext.legacy.data import Field, TabularDataset, BucketIterator
from torchtext.legacy import datasets
from torch.utils.data.dataset import Subset
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import Transformer
from torchmetrics.functional import precision_recall,f1_score,accuracy
import torch.optim as optim
import math
from torch import Tensor

import matplotlib.pyplot as plt
import numpy as np

import random
import time
from datetime import datetime
from collections import Counter

# from imblearn.over_sampling import RandomOverSampler
from torch.utils.tensorboard import SummaryWriter

''' instal packages '''

#### parameters

In [150]:
BATCH_SIZE = 32 #1,2,4,8,16,32,64,128,256,512,1028
path = "data_filter/"
# path = "small_data/" 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Reference 
# ['charngram.100d', 'fasttext.en.300d', 'fasttext.simple.300d', 'glove.42B.300d', 'glove.840B.300d', 
#  'glove.twitter.27B.25d', 'glove.twitter.27B.50d', 'glove.twitter.27B.100d', 'glove.twitter.27B.200d', 
#  'glove.6B.50d', 'glove.6B.100d', 'glove.6B.200d', 'glove.6B.300d']


#### seed initializing

In [151]:
seed=1234

random.seed(1234)
np.random.seed(1234)
torch.manual_seed(1234)
torch.cuda.manual_seed(1234)
torch.backends.cudnn.determininistic=True

### Data Processing

In [152]:
# step 1

# removeTokens = lambda values: values[1:-1]   # function to remove [CLS] and [SEP] from the data set

tokens = Field(sequential=True,use_vocab=True,batch_first = True,lower=True,pad_token="<pad>", init_token = '<sos>', eos_token = '<eos>')
edits = Field(sequential=True,use_vocab=True,batch_first = True,lower=True,pad_token="<pad>", init_token = '<sos>', eos_token = '<eos>')

fields = {'tokens':('tokens',tokens),'labels':('edits',edits)}

train_data, val_data, test_data = TabularDataset.splits(path=path,train='ptrain.jsonl',validation='val.jsonl',
                                                        test='test.jsonl',format='json',fields=fields)

# train_data is dataset with edits and tokens pair. in edits and tokens list of string is available


In [153]:
# step 2  Build a vocab

tokens.build_vocab(train_data,min_freq=1)
edits.build_vocab(train_data,min_freq=1)

# os =  RandomOverSampler()
# X_train_res, y_train_res = os.fit_sample(train_data, test_data)

In [154]:
# Step 3 (Create a iterator to loop over the data. Also separate batchs with 
#         similar length and pad the extra space)

train_data_iterator = BucketIterator(train_data,train=True,
                                            batch_size=BATCH_SIZE, device= device,shuffle=True)#,sort_within_batch=False)

val_data_iterator =BucketIterator(val_data,BATCH_SIZE,train=False,sort=False, device= device)#,sort_within_batch=False)

test_data_iterator =BucketIterator(test_data,BATCH_SIZE,train=False,sort=False, device= device)#,sort_within_batch=False)



#### data processing result

In [155]:
len(train_data)

49896

In [156]:
len(val_data.examples)

12374

In [157]:
print(train_data)              # Tabular Data set object

print(train_data[0].__dict__.keys())
print(train_data[0].__dict__.values())

<torchtext.legacy.data.dataset.TabularDataset object at 0x7f856837b250>
dict_keys(['tokens', 'edits'])
dict_values([['[cls]', 'alistair', 'darling', 'is', 'expected', 'to', 'announce', 'details', 'of', 'tax', 'cuts', 'and', 'plans', 'to', 'increases', 'public', 'spending', '[sep]'], ['$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$transform_verb_vbz_vb', '$keep', '$append_.', '$keep']])


In [158]:
batch_1 = next(iter(train_data_iterator))
print(batch_1.edits.shape)
print(batch_1.tokens.shape)

torch.Size([32, 53])
torch.Size([32, 53])


In [159]:
#string to index
print(f"tokens.vocab.stoi['0'] = {tokens.vocab.stoi['']}")
print(f"tokens.vocab.itos[0] = {tokens.vocab.itos[2]}")
print(f"edits.vocab.stoi['$keep'] = {edits.vocab.stoi['0']}")
print(f"edits.vocab.itos[1] = {edits.vocab.itos[1]}")

tokens.vocab.stoi['0'] = 0
tokens.vocab.itos[0] = <sos>
edits.vocab.stoi['$keep'] = 0
edits.vocab.itos[1] = <pad>


In [160]:
#length of vocabular create from the data set
print(f"len(tokens.vocab) = {len(tokens.vocab)}")
print(f"len(edits.vocab) = {len(edits.vocab)}")

len(tokens.vocab) = 64172
len(edits.vocab) = 24


In [161]:
# no. of unique words in tokens and edits
print(f"len(tokens.vocab.freqs.keys()) = {len(tokens.vocab.freqs.keys())}")
print(f"len(edits.vocab.freqs.keys()) = {len(edits.vocab.freqs.keys())} \n")
print(f"edits.vocab.freqs = {edits.vocab.freqs}")

len(tokens.vocab.freqs.keys()) = 64168
len(edits.vocab.freqs.keys()) = 20 

edits.vocab.freqs = Counter({'$keep': 1191726, '$delete': 39872, '$replace_.': 7235, '$replace_,': 7183, '$transform_agreement_singular': 6220, '$append_.': 5167, '$append_,': 4905, '$append_the': 4686, '$replace_to': 3634, '$replace_the': 3574, '$replace_of': 3458, '$transform_verb_vbz_vb': 3253, '$replace_in': 2898, '$transform_verb_vbg_vb': 2714, '$transform_verb_vbn_vb': 2637, '$append_to': 2499, '$append_of': 2413, '$transform_agreement_plural': 2340, '$append_and': 2272, '$append_a': 2204})


In [166]:
#shape of vocabular create from the data set
print(f"tokens.vocab.vectors.shape = {tokens}")
print(f"edits.vocab.vectors.shape = {edits}")
print()

tokens.vocab.vectors.shape = <torchtext.legacy.data.field.Field object at 0x7f8568393fd0>
edits.vocab.vectors.shape = <torchtext.legacy.data.field.Field object at 0x7f8568393410>



build vocab just takes unique tokens from the dataset and given a position and stores
as a dictionary. when it is applied to the dataset the result comming from the 
bucket iteartor is just a postion no. from the build vocab and the rest is padded

### Model

In [167]:
class Seq2SeqTransformer(nn.Module):
    def __init__(self,
                 encoder_layer:int, # num of layer in encoder
                 decoder_layer:int, #num of layer in decoder
                 emb_dim:int, #embedding dimension
                 head:int, #num of head
                 src_vocab_size:int,
                 trg_vocab_size:int,
                 feedforward_dim:int, 
                 src_pad_idx:int,
                 trg_pad_idx:int,
                 device:str,
                 dropout:float=0.0):
        
        super().__init__()
        
        self.head = head
        self.emb_dim = emb_dim
        self.device = device
        self.src_pad_idx = src_pad_idx
        self.trg_pad_idx = trg_pad_idx
        
        #Embedding layer
        self.src_embedding_layer = nn.Embedding(src_vocab_size,emb_dim,device=device)
        self.trg_embedding_layer = nn.Embedding(trg_vocab_size,emb_dim,device=device)
        
        #transformer layer
        self.transformer = nn.Transformer(d_model = emb_dim,
                                       nhead = head,
                                       num_encoder_layers = encoder_layer,
                                       num_decoder_layers = decoder_layer,
                                       dim_feedforward=feedforward_dim,
                                       dropout = dropout,
                                       batch_first = True,
                                       device = device)
        
        #Linear Layer
        self.linear_layer = nn.Linear(emb_dim,trg_vocab_size)
        
        
        # Dropout Layer
        self.dropout = nn.Dropout(dropout)

        
    def positional_embedding(self,length = 200):
        
        position = torch.arange(length).unsqueeze(1).to(self.device)    # [batch_size, num_of_tokens]
        denominator = torch.exp(torch.arange(0, self.emb_dim, 2) * (-math.log(10000.0) / self.emb_dim))
        
        position_embedding = torch.zeros((length, self.emb_dim))
        position_embedding[:,0::2] = torch.sin(position*denominator)
        position_embedding[:,1::2] = torch.cos(position*denominator)
        
        position_embedding = position_embedding.unsqueeze(0)
        # position_embedding = (1,lenght,emb_dim)
        
        return position_embedding
                                                    
        
    def make_padding_mask(self,template,idx):
        #mask = [batch size, src_len/trg_len]
        return (template == idx)
    
    def trg_mask(self,trg):
        trg_len = trg.shape[1]
        trg_mask = torch.triu(torch.ones((trg_len,trg_len),device=self.device)).bool()
        #trg_mask = (trg_len,trg_len)
        return trg_mask
    
    def forward(self,
                src : Tensor, #(batch_size,src_len)
                trg : Tensor): #(batch_size,trg_len) 
                #in this case src_len == trg_len
               
        
        batch_size , src_len  = src.shape
        trg_len = src_len # depends upon the senario in our case source and target lenght are same
        
        # Applying embedding layer
        
        src_emb = self.src_embedding_layer(src)+self.positional_embedding(src_len)
        trg_emb = self.trg_embedding_layer(trg)+self.positional_embedding(trg_len)
        src_emb = self.dropout(src_emb)
        trg_emb = self.dropout(trg_emb)
        # print(f"src_emb {src_emb[0]}")
        # print(f"trg_emb {trg_emb[0]}")
        # src_emb = trg_emb = (batch_size,src_len/trg_len,emb_dim)
        
        trg_mask = self.trg_mask(trg)
        src_pad_mask = self.make_padding_mask(src,self.src_pad_idx)
        trg_pad_mask = self.make_padding_mask(trg,self.trg_pad_idx)
        
        print(f"batch_size {batch_size}")
        print(f"src {src.shape}")
        print(f"trg {trg.shape}")
        print(f"trg_mask {trg_mask} type {trg_mask.dtype}")
        print(f"src_pad {src_pad_mask[0]} type {src_pad_mask.dtype}")
        print(f"trg_pad {trg_pad_mask[0]} type {trg_pad_mask.dtype}")
        print(f"src {src[0]}")
        print(f"trg {trg[0]}")
            
        # Apply transformer layer
        transformer_output = self.transformer(src_emb, #(batch_size,src_len,emb_dim)
                                   trg_emb, #(batch_size,trg_len,emb_dim) 
                                   tgt_mask=trg_mask, #(trg_len,trg_len)
                                   src_key_padding_mask=src_pad_mask, #[batch size, src_len]
                                   tgt_key_padding_mask=trg_pad_mask) #[batch size, trg_len]
        
        # transformer_output = (batch_size,trg_len,emb_dim)
        print(f"transformer_output {transformer_output.shape} ")
        print(f"transformer_output {transformer_output[0]}")
        # Apply Linear layer
        output = self.linear_layer(transformer_output)
        print(f"output {output[0]}")
        print(f"output {output.shape} ")
        # output = (batch_size,trg_len,num_class)
        
        return output.permute(0,2,1)
        


### Utility functions

In [168]:
def train_model(model,data_iterator,optimizer,criterion,clip):
    
    model.train()
    epoch_loss,acc,f1_point = 0,0,0
        
    for i, batch in enumerate(train_data_iterator):
        
        # Make the gradient vector to zero  
        # So not to add previous gradient values with the new gradient value
        optimizer.zero_grad()

        # converting to cpu or gpu variable
        src = batch.tokens.to(device)
        trg = batch.edits.to(device)
        
        # Forward pass
        # get output from the model
        output = model(src, trg)
        # outputs = [Batch_size,num_class,trg_len] 
        # print(f"out dim = {output.shape}")
        # print(f"trg dim = {trg.shape} ")
        
        loss = criterion(output, trg[:,1:])

        # Backward and optimize
        
        # to calculate gradient
        loss.backward()
        
        
        # to make the updates in the parameter
        optimizer.step()
        
        epoch_loss += loss.item()
        print(loss.item())
        
        predicted = torch.argmax(output, dim=1)
        # print(f"predicted shape = {predicted.shape}")
        # print(f"trg shape = {trg.shape}")
        print(predicted)
        acc += accuracy(predicted, trg).item()       # TP+TN / TP+TN+FP+FN
        f1_point += f1_score(predicted, trg,average="macro",num_classes=22,mdmc_average='global').item()
        # f1_score = 2(precission*recall)/(precission+recall)
        
    acc = 100.0 * acc / len(data_iterator)
    f1_point = f1_point / len(data_iterator)
    epoch_loss = epoch_loss / len(data_iterator)
    #to return the avg loss for this epoch to train the model
    return (epoch_loss,acc,f1_point)

In [169]:
def evaluate_model(model, data_iterator, criterion):
    
    model.eval()

    epoch_loss,acc,f1_point = 0,0,0
    
    testing = []
    
    with torch.no_grad():
        for i, batch in enumerate(data_iterator):
            
            # converting to cpu or gpu variable
            src = batch.tokens.to(device)
            trg = batch.edits.to(device)

            # Forward pass
            # get output from the model
            output = model(src, trg)
            # outputs = [Batch_size,num_class,trg_len] 

            loss = criterion(output, trg)
            
            epoch_loss += loss.item() 
            
            predicted = torch.argmax(output, dim=1)

            acc += accuracy(predicted, trg).item()       # TP+TN / TP+TN+FP+FN
            f1_point += f1_score(predicted, trg,average="macro",num_classes=22,mdmc_average='global').item()
            # f1_score = 2(precission*recall)/(precission+recall)
            
    acc = 100.0 * acc / len(data_iterator)
    f1_point = f1_point / len(data_iterator)
    epoch_loss = epoch_loss / len(data_iterator)
    
    #to return the avg loss for this epoch to train the model
    return (epoch_loss,acc,f1_point)


In [170]:
def baruni_metric(dataloader):
    model.eval()
    crct_class = 0
    incrct_class = 0
    other_class = 0
    for dat in dataloader:
        
        X = dat.tokens
        Y = dat.edits
    #     print("here: ",len(X))
        
        for token,edit in zip(X,Y):
        #move to GPU
            x,y = token.to(device), edit.to(device)
            # Compute prediction error
            x = x.unsqueeze(0)
            y = y.unsqueeze(0)
            # x = [1,num_token]
    #         print(x.size(),": x")
    #         print("y: ", y.size())

            check_pred = model(x,y)
    #         print("check_pred",check_pred.size())
            check_pred = torch.argmax(check_pred, dim = 1)
        #     print(check_pred)

        #     print(y.size())
        #     break

            for i in range(len(y[0])):
                if y[0][i] not in [1,0,2]:
                    other_class += 1
                    if y[0][i] == check_pred[0][i] :
                        crct_class += 1
                    else:
                        incrct_class += 1
    
    return other_class,crct_class,incrct_class

### Hyper parameters

In [173]:
learning_rate = 0.05
num_epochs = 50
clip = 0.1
num_encoder_layer = 3
num_decoder_layer = 3
INPUT_DIM = len(tokens.vocab)
OUTPUT_DIM = len(edits.vocab)
HIDDEN_DIM = 128
EMBEDDING_DIM = 100  #vocabular size, dim
layer = 1
heads = 2
weight_decay = 0
amsgrad = False
DROPOUT = 0.1
PAD_IDX = tokens.vocab.stoi[tokens.pad_token]
UNK_IDX = tokens.vocab.stoi[tokens.unk_token]
EDIT_PAD_IDX = edits.vocab.stoi[edits.pad_token]

### Model creation

In [174]:
# model is created

model = Seq2SeqTransformer(num_encoder_layer,num_decoder_layer,EMBEDDING_DIM,heads,INPUT_DIM,OUTPUT_DIM,HIDDEN_DIM,PAD_IDX,EDIT_PAD_IDX,device,DROPOUT)


In [175]:
# Initialize optimizer and loss function

criterion = nn.CrossEntropyLoss(ignore_index=EDIT_PAD_IDX)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)#,weight_decay=weight_decay, amsgrad=amsgrad)

n_total_steps = len(train_data_iterator)
model = model.to(device)
criterion = criterion.to(device)

In [177]:
# # load the pre-tained embeddings 

# pretrained_embeddings = tokens.vocab.vectors
# # model = torch.load(path+'/model.pt')
# model.src_embedding_layer.weight.data.copy_(pretrained_embeddings)

In [178]:
# initialize zero weights for unknown and padding tokens.

model.src_embedding_layer.weight.data[UNK_IDX] = torch.zeros(EMBEDDING_DIM)
model.src_embedding_layer.weight.data[PAD_IDX] = torch.zeros(EMBEDDING_DIM)

In [179]:
# trainable parameters are printed

count_parameters= lambda model:sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 6,943,992 trainable parameters


In [180]:
batch = next(iter(train_data_iterator))

optimizer.zero_grad()

tokens_list = batch.tokens.to(device)
edits_list = batch.edits.to(device)

# Forward pass
# print(f"tokens_list {tokens_list[0]}")
# get output from the model
outputs = model(tokens_list, edits_list)


batch_size 32
src torch.Size([32, 42])
trg torch.Size([32, 42])
trg_mask tensor([[ True,  True,  True,  ...,  True,  True,  True],
        [False,  True,  True,  ...,  True,  True,  True],
        [False, False,  True,  ...,  True,  True,  True],
        ...,
        [False, False, False,  ...,  True,  True,  True],
        [False, False, False,  ..., False,  True,  True],
        [False, False, False,  ..., False, False,  True]]) type torch.bool
src_pad tensor([False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True]) type torch.bool
trg_pad tensor([False, False, False, False, False, False, False, False, False, False,
        False, False, False, False, False, False, False, False,  True,  True,
         True,  True,  Tru

In [126]:
outputs

tensor([[[nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         ...,
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan]],

        [[nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         ...,
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan]],

        [[nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         ...,
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan]],

        [[nan, nan, nan,  ..., nan, nan, nan],
         [nan, nan, nan,  ..., nan, nan, nan],
         [na

### Uploading model to tensor board for visualization

In [101]:
# default `log_dir` is "runs" - we'll be more specific here
writer = SummaryWriter(f'runs/transformer_test_1.1_')


#### Train and testing

In [102]:
train_loss,train_acc,train_f1_score,val_loss,val_acc,val_f1_score = 0,0,0,0,0,0

In [103]:
start = time.perf_counter_ns()
try:
    for epoch in range(num_epochs):
        
        train_loss,train_acc,train_f1_score = train_model(model,train_data_iterator,optimizer,criterion,clip)
        val_loss,val_acc,val_f1_score = evaluate_model(model, val_data_iterator, criterion)
        
        if epoch%5 == 0:
            pass
        print (f'''Epoch [{epoch+1}/{num_epochs}],
        Train:       Loss: {train_loss:.3f}, Accuracy: {train_acc:.3f},  F1 score: {train_f1_score:.3f}
        Validation:  Loss: {val_loss:.3f}, Accuracy: {val_acc:.3f},  F1 score: {val_f1_score:.3f}''')
            
        
        # including the loss, accuracy and f1 score to tensor board
        writer.add_scalars('LOSS',     { 'Train' : train_loss   ,'Validation' : val_loss    },  epoch)
        writer.add_scalars('ACCURACY', { 'Train': train_acc     ,'Validation': val_acc      }, epoch)
        writer.add_scalars('F1 SCORE', { 'Train': train_f1_score,'Validation': val_f1_score }, epoch)
    
    
    
finally:
    end = time.perf_counter_ns()
    timetaken = (end-start)*1.66667*10**-11
    print(f"time take is {timetaken:.3f} min")
#     torch.save(model, path+'/model.pt')


nan
tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]])
nan
tensor([[0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        ...,
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0],
        [0, 0, 0,  ..., 0, 0, 0]])
time take is 0.010 min


ValueError: The highest label in `target` should be smaller than `num_classes`.

In [None]:
test_loss,test_accuracy,f1 = evaluate_model(model, test_data_iterator, criterion)
print(f"Test Loss {test_loss:.3f} Test accuracy {test_accuracy:.3f} F1 score {f1:.3f}")
total,crt,incrt = baruni_metric(train_data_iterator)
print(f"total no.of class excet<keep> {total}\ncrt pred {crt}\nincrt pred{incrt}")

In [None]:
# writing the hyerparameter to the tensor board
writer.add_hparams({'learning_rate' : learning_rate, 
                    'Num_epochs': num_epochs,
                    'layer': layer,
                    'input_dim' : INPUT_DIM,
                    'output_dim' : OUTPUT_DIM,
                    'hidden_dim' : HIDDEN_DIM,
                    'embedding_dim' : EMBEDDING_DIM,
                    'droppout' : DROPOUT,
                    'train_data_len': len(train_data),
                    'Val_data_len': len(test_data),
                    'batch_size': BATCH_SIZE,
                    'weight_decay' : weight_decay,
                    'amsgrad' : amsgrad
                  },
                  {
                     "total_num_of_class_except_keep":total,
                      "crt":crt,"incrt":incrt,
                      "test_loss":test_loss,
                      "test_accuracy":test_accuracy,
                      "test_f1_score":f1
                  })
writer.close()

### Rough work

In [None]:
l=['plus', ',', 'the', 'novelty', 'of', 'the', 'iphone', 'won', "'t", 'wear', 'off', ',', 'as', 'it', 'may', 'with', 'a', 'camcorder', ';', 'and', 'over', 'these', 'video', 'apps', 'have', 'fun', 'effects', 'that', 'a', 'camcorder', 'can', "'t", 'match', '.']
l1=['$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$replace_might', '$keep', '$keep', '$keep', '$replace_,', '$keep', '$delete', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep']
print(len(l))
print(len(l1))

In [None]:
a=[25, 13, 14, 29, 13, 13, 13, 25, 13, 13, 25, 21, 13, 13, 29, 13,  2,  2,
        13, 13, 13, 25]
b=[2, 2, 2, 3, 3, 2, 2, 2, 8, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]

In [None]:
print(len(a))
print(len(b))

In [None]:
n_embeddings, dim = 10,4 #10 - # of vocac size 4 - # of emdebbing dimension

In [None]:
m = nn.Conv1d(1, 2, 3)
input = torch.randn(2, 1, 5) # (batch size, no. of channel, # of words)
# x = emb_1(input)
output = m(input)

In [None]:
input.shape

In [None]:
output.shape

In [None]:
torch.arange(0, 5, 2)

In [None]:
from torch.nn import Embedding as emb

emb_1 = emb(188, 50,requires_grad = True)
print(f"embedding {emb_1}")
print(f"embedding weight's shape {emb_1.weight.shape}") #requires_grad=True therefore the matrix is learnable

print(f"values of weight {emb_1.weight}")


In [None]:
x.shape

In [None]:
x = torch.tensor([[1,2,4],[1, 2, 3]])
print(x.shape)
x=x.repeat(4, 2)
print(x.shape)


In [None]:
import torch
import torch.nn as nn

In [None]:
a = nn.Conv2d(in_channels =1 ,out_channels= 1,kernel_size = 1)(x_input)
b = nn.Conv2d(in_channels =1 ,out_channels= 1,kernel_size = 2)(a)
c = nn.Conv2d(in_channels =1 ,out_channels= 1,kernel_size = 6)(b)

In [None]:
print(f"a = {a.shape}")
print(f"b = {b.shape}")
print(f"c = {c.shape}")

In [None]:
d = c.squeeze(1).reshape(2,5,-1)
d.shape


In [None]:
print(f"f = {f.shape}")

In [None]:
g = nn.Conv1d(in_channels =1 ,out_channels= 4,kernel_size = 3)(f)

In [None]:
print(f"g = {g.shape}")

In [None]:
x_input = torch.randn([2,4,5])

# print(x_input)
x_input = x_input.permute(0,2,1).reshape(2,-1).unsqueeze(1)
print(x_input.shape)
print(x_input)

#### ref

In [None]:
a = torch.randn(2, 5, 4)
a

In [None]:
torch.argmax(a, dim=1).shape

In [None]:
torch.argmax(a, dim=-1)

In [None]:
y_act = torch.Tensor([[2,1],[1,3]]).int()
# batch size, tokens
print(f" output shape {y_act.shape}")
s1 = torch.Tensor([[[0,2,3,0],[5,0,1,3]],[[1,5,2,3],[1,5,6,13]]])
# batch size, tokens, emb lay

print(f" predicted shape {s1.shape}")
y_pred = torch.argmax(s1,dim=-1)


In [None]:
print(y_pred)
print(y_act)

In [None]:
# print(f1_score(y_act,s1))
print(accuracy_score(y_act,y_pred))

In [None]:
precision_recall(y_pred, y_act, average="macro",num_classes=4,mdmc_average='samplewise')

In [None]:
f1_score(y_pred,y_act,average="macro",num_classes=4,mdmc_average='global')

In [None]:
from seqeval.metrics import accuracy_score
from seqeval.metrics import classification_report
from seqeval.metrics import f1_score
y_true = [['O', 'O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]
y_pred = [['O', 'O', 'B-MISC', 'I-MISC', 'I-MISC', 'I-MISC', 'O'], ['B-PER', 'I-PER', 'O']]
print(f1_score(y_true, y_pred))

y_act = [[2,1],[1,3]]
s1 = [[[0,2,3,0],[5,0,1,3]],[[1,5,2,3],[1,5,6,13]]])
# batch size, tokens, emb lay

print(f" predicted shape {s1.shape}")
y_pred = torch.argmax(s1,dim=-1)
print(f1_score(y_act,y_pred))

In [None]:
p, r = precision_recall(y_pred, y_act, average="macro",num_classes=4,mdmc_average='global')
print(p)
print(r)

In [None]:
(2*p.item()*r.item())/(p.item()+r.item())

In [None]:
print(accuracy(y_pred,y_act))

In [None]:
accuracy

In [None]:
0.5+0.5+0.5+0.5

In [None]:
_/4

In [None]:
x_input = torch.randn([2,4,5])

# print(x_input)
x_input = x_input.reshape(2,-1).unsqueeze(1)
print(x_input.shape)
print(x_input)

In [None]:
b1 = nn.Conv1d(in_channels =1 ,out_channels= 10,kernel_size = 1,padding="same")(x_input)
c1 = nn.Conv1d(in_channels =10 ,out_channels= 10,kernel_size = 3,padding="same")(b1)
d1 = nn.Conv1d(in_channels =10 ,out_channels= 10,kernel_size = 5,padding="same")(c1)
d2 = nn.Conv1d(in_channels =10 ,out_channels= 1,kernel_size = 1,padding="same")(d1)

In [None]:
print(f"b1 = {b1.shape}")
print(f"c1 = {c1.shape}")
print(f"d1 = {d1.shape}")
print(f"d2 = {d2.shape}")

In [None]:
f = d2.reshape(2,4,-1)

In [None]:
f.shape 
# 1 -> ed*num filter
# 2*filtersize -> 

In [None]:
15*3

In [None]:
a1 = nn.Conv1d(in_channels =4 ,out_channels= 4,kernel_size = 2,padding="same")(x_input)
b1 = nn.Conv1d(in_channels =4 ,out_channels= 4,kernel_size = 1,padding="same")(x_input)
c1 = nn.Conv1d(in_channels =4 ,out_channels= 4,kernel_size = 3,padding="same")(x_input)
d1 = nn.Conv1d(in_channels =4 ,out_channels= 4,kernel_size = 4,padding="same")(x_input)
e1 = torch.cat([a1,b1,c1,d1],dim =1)
f1 = nn.Conv1d(in_channels = e1.shape[1],out_channels= 25,kernel_size = 1)(e1)

In [None]:
pooled1 = [a1,b1,c1,d1]

In [None]:
print(f"e1 = {e1.shape}")

In [None]:
print(f1.shape)

In [None]:
x  = np.arange(0,10,1)
y1 = np.random.randn(10)
y3 = np.random.randn(10)

# Create subplots
fig, ax = plt.subplots(2, 1, sharex='col')
ax[0].plot(x,y1)
ax[1].plot(x,y3)

In [None]:
 torch.sqrt(torch.FloatTensor([0.5]))

In [None]:

# print('Finished Training')
# PATH = './cnn.pth'
# torch.save(model.state_dict(), PATH)

In [None]:
x_input = torch.randn([2,4,5])

# print(x_input)
x_input = x_input.reshape(2,-1).unsqueeze(1)
print(x_input.shape)
print(x_input)

In [None]:
# Multiple conv 1D layers 
conv_layers = nn.ModuleList([nn.Conv1d(in_channels = 1,
                                            out_channels= 1,
                                            kernel_size = filter_size,
                                            padding = "same") 
                                  for filter_size in [1,3,5]])

final_layer = nn.Linear(5*3, 3)

# Dropout layers
dropout = nn.Dropout(0.5)

In [None]:

# tokens size is [batch_size, max sentence size in the batch/token size]
# token_embeddings = [2, 1, 4*5]
conved = x_input
for conv_layer in conv_layers:
    conved = F.relu(conv_layer(conved))
print(len(conved))
concat = dropout(torch.cat(conved, dim = 1))
# [batch size, embedding dimension*num_of_tokens*num_of_filter]

In [None]:
concat.shape

In [None]:
conved = concat.reshape(2,4,-1)
# conved = [batch_size, num_of_tokens, embedding_dimension*num_of_filters]
conved.shape

In [None]:
output = final_layer(conved)

In [None]:
d = {"tokens": ["Alistair", "Darling", "is", "expected", "to", "announce", "details", "of", 
                "tax", "cuts", "and", "plans", "to", "increases", "public", "spending"], 
     "labels": ["$KEEP", "$KEEP", "$KEEP", "$KEEP", "$KEEP", "$KEEP", "$KEEP", "$KEEP", "$KEEP", 
                "$KEEP", "$KEEP", "$KEEP", "$KEEP", "$TRANSFORM_VERB_VBZ_VB", "$KEEP", "$APPEND_."]}

In [None]:
calculate_accuracy(test_data,tokens,edits,model,device)

In [None]:
len(d["labels"])

In [None]:
a = ["$KEEP", "$KEEP", "$KEEP", "$KEEP", "$KEEP", "$KEEP", "$KEEP", "$KEEP", "$KEEP", "$KEEP", "$KEEP", "$KEEP", "$KEEP", "$TRANSFORM_VERB_VBZ_VB", "$KEEP", "$APPEND_."] 
b = ['$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$keep', '$transform_verb_vbz_vb', '$keep', '$keep']

In [None]:
for i,j in zip(a,b):
    print(i.lower()==j,end=" ")

In [None]:
ran = torch.Tensor([[[1,2,10],[3,4,11]],[[5,6,12],[7,8,13]]])

In [None]:
ran.shape

In [None]:
ran.reshape(2,-1)

In [None]:
sample_weights = [0] * len(train_data)

In [None]:
x_input = torch.randn([2,4,5])
filterss = [2,3,4]
inf = 1
x_input = x_input.reshape(2,-1).unsqueeze(1)
print(x_input.shape)
# print(x_input)

conv_layers = nn.ModuleList()

for i,filter_size in enumerate(filterss):
    conv_layers.append(nn.Conv1d(in_channels = inf,out_channels= 10,
                                 kernel_size = filter_size,padding = "same"))
    inf = 10

con = x_input
print("in",con.shape)

for i, conv_layer in enumerate(conv_layers):
    #pass through convolutional layer
    print(f"{i} input {con.shape}")
    
    conved = F.relu(conv_layer(con))
    print(f"{i} iiiii {conved.shape}")
    conved = conved + con
    con = conved
    
            

In [None]:
conv_layers

In [None]:
from imblearn.over_sampling import RandomOverSampler

In [None]:
oversample = RandomOverSampler(sampling_strategy='minority')

In [None]:
for i, (data,label) in enumerate(train_data_iterator):
    print(label)
    break
class_weights= torch.Tensor([1/value for key,value in edits.vocab.freqs.items()])


In [None]:
l1 = [batch.edits for batch in train_data_iterator]
l1 = [lst for edit in l1 for lst in edit]


In [None]:
l1

In [None]:
for data in train_data_iterator:
        if len(files) > 0:
            class_weights.append(1/len(files))


In [None]:
for idx, (token, edit) in enumerate(train_data):
    class_weight = sum([class_weight for class_weight in eidts])
    sample_weights[idx] = class_weight

sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

In [None]:
x_

In [None]:
import torch
import torch.nn.functional as F

# assume we have some tensor x with size (b, t, k)
x = torch.zeros((64,48,100))

raw_weights = torch.bmm(x, x.transpose(1, 2))

In [None]:
x.transpose(1, 2).shape

In [None]:
trg = torch.rand((4,5))
trg_pad_idx=1
trg_len = 5
trg_pad_mask=(trg!=trg_pad_idx).unsqueeze(1).unsqueeze(2)
print(trg_pad_mask.shape)
trg_sub_mask = torch.tril(torch.ones((trg_len, trg_len))).bool()
print(trg_sub_mask.shape)
trg_mask=trg_pad_mask & trg_sub_mask

In [None]:
a = torch.ones((5,1,1,5))
a = (a!=0)
print(a.shape)
b = torch.tril(torch.ones((5, 5))).bool()
print(b.shape)
s = a&b
print(s.shape)

In [None]:
print(s[0])
print(a)
print(b)

In [None]:
from collections import Counter

In [None]:
l = torch.Tensor([[None,1,1,2,3,4,4],
     [3,2,3,2,3,2,2]])

ll = torch.Tensor([[1,4,1,2,3,4,4],
     [3,2,3,2,3,2,2]])
l

In [None]:
a = (l == 1)
b = (l == 2)
print(a,"\n",b)
print(a&b)

In [None]:
a = (l == 1)
b = (l == 2)
print(a,"\n",b)
print(!a & !b)

In [None]:
from itertools import chain
from collections import Counter
list1 = [[12,24,36], [3,5,12,24], [36,41,69]]
torch.bincount(a)

In [None]:
a = torch.Tensor([[1,0,0],[1,1,0],[1,1,1]])
a

In [None]:
a.transpose(0,1)

In [None]:
mask = (torch.triu(torch.ones((5, 5))) == 1).transpose(0, 1)
print(mask)
mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
print(mask)

In [None]:
import math

In [None]:
torch.exp(- torch.arange(0, 5, 2)* math.log(10000) / 5)

###  THE END