In [1]:
from torch import nn
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib
from torch.autograd import Variable
import gensim, logging
import gensim.downloader as api
from gensim.models import Word2Vec
from gensim.models import KeyedVectors
import pandas as pd
from pathlib import Path
from sklearn.metrics import classification_report


In [2]:
trainloader =  joblib.load('/kaggle/input/final-dl/train_data_loader.pkl')
testloader = joblib.load('/kaggle/input/final-dl/test_data_loader.pkl')

BATCH_SIZE = 128
train_dataloader = torch.utils.data.DataLoader(trainloader.dataset,batch_size= BATCH_SIZE, shuffle= True)
test_dataloader = torch.utils.data.DataLoader(testloader.dataset,batch_size= BATCH_SIZE)



  return torch.load(io.BytesIO(b))


In [12]:
from torch import nn
from tqdm import tqdm
import torch.nn.functional as F
#--------------------------------------------training loop----------------------
#we will output F1 score or confusion matrix at each step

from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

def F1_tensor(y_true, y_pred):
    y_true = y_true.to('cpu').numpy()
    y_pred = y_pred.to('cpu').numpy()
    return f1_score(y_true, y_pred)

def Confusion_matrix_tensor(y_true, y_pred):
    y_true = y_true.to('cpu').numpy()
    y_pred = y_pred.to('cpu').numpy()
    return f1_score(y_true, y_pred)

def convert_from_tensor(y): #convert from tensor to some kind of array that we can use numpy
    return y.cpu().detach().numpy().reshape(-1)

def take_all_elem(container, target):
    for x in target:
        if (x != 0 and x != 1):
            container.append(1)
        else:
            container.append(x)

def save_model(model):
    MODEL_PATH = Path('/kaggle/working/')
    MODEL_PATH.mkdir(parents = True, exist_ok = True)
    MODEL_NAME = 'best_LSTMmodel.pth'
    MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME
    print(f'Update new best model to : {MODEL_SAVE_PATH}')
    torch.save(obj = model.state_dict(),f = MODEL_SAVE_PATH)

def train_step(model : nn.Module,
               data_loader : torch.utils.data.DataLoader,
               loss_function : nn.Module,
               optimizer,
               device = 'cuda'):
    model.train()
    loss = 0

    all_y_true = []
    all_y_pred = []

    for batch in data_loader:
        X_train = batch[0].to(device)
        y_train = batch[1].to(device)
        #print(y_train.type())
        y_pred = model(X_train)
        y_pred01 = torch.round(torch.sigmoid(y_pred))

        batch_loss = loss_function(y_pred.float(),y_train.unsqueeze(1).float())
        #print(batch_loss.type())
        loss += batch_loss

        take_all_elem(all_y_true,convert_from_tensor(y_train))
        take_all_elem(all_y_pred,convert_from_tensor(y_pred01))


        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()

    loss /= len(data_loader)

    all_y_true = np.array(all_y_true)
    all_y_pred = np.array(all_y_pred)

    #print(all_y_true)
    #print(np.unique(all_y_true))

    print('------------------Train Result----------------------------')
    print(f'Training loss : {loss} | F1_score : {f1_score(all_y_true,all_y_pred)}')
    print(f'Confusion matrix :')
    print(confusion_matrix(all_y_true,all_y_pred))
    print(f'Classification report :')
    print(classification_report(all_y_true, all_y_pred, digits=4))


def test_step(model : nn.Module,
              data_loader : torch.utils.data.DataLoader,
              loss_function : nn.Module,
              optimizer,
              device = 'cuda'):

    model.eval()
    loss,acc = 0,0
    all_y_true = []
    all_y_pred = []

    with torch.no_grad():
        loss = 0

        for (X_test,y_test) in data_loader:
            X_test = X_test.to(device)
            y_test = y_test.to(device)

            test_logits = model(X_test).squeeze()
            test_01 = torch.round(torch.sigmoid(test_logits))

            batch_loss = loss_function(test_logits.float(),y_test.float())

            loss += batch_loss

            take_all_elem(all_y_true,convert_from_tensor(y_test))
            take_all_elem(all_y_pred,convert_from_tensor(test_01))


        loss /= len(data_loader)
        acc /= len(data_loader)
    current_f1_score = f1_score(all_y_true,all_y_pred)
    print('------------------Test Result----------------------------')
    print(f'Testing loss : {loss} | F1_score : {current_f1_score}')
    global best_f1_score
    if (current_f1_score > best_f1_score):
        best_f1_score = current_f1_score
        save_model(model)
    print(f'Confusion matrix :')
    print(confusion_matrix(all_y_true,all_y_pred))
    print(f'Classification report :')
    print(classification_report(all_y_true, all_y_pred, digits=4))
    print('---------------------------------------------------------')

matrix_size = (64001,768)

class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.attention = nn.Linear(hidden_dim, 1)

    def forward(self, lstm_output):
        """
        lstm_output: Tensor of shape (batch_size, seq_len, hidden_dim)
        """
        # Compute attention scores
        attn_scores = self.attention(lstm_output).squeeze(-1)  # (batch_size, seq_len)
        attn_weights = F.softmax(attn_scores, dim=1)  # Normalize scores to probabilities
        
        # Compute context vector as weighted sum of LSTM outputs
        context = torch.bmm(attn_weights.unsqueeze(1), lstm_output).squeeze(1)  # (batch_size, hidden_dim)
        
        return context, attn_weights

class LSTMmodel(nn.Module):
    def __init__(self, matrix_size):
        super().__init__()
        self.embedding = nn.Embedding(64001,768)
        self.rnn = nn.LSTM(input_size=matrix_size[1], hidden_size=512,
                           num_layers=2, batch_first=True, bidirectional=True)
        self.attention = Attention(512 * 2)
        # Output size should be doubled due to bidirectionality
        self.fc = nn.Linear(512 * 2,out_features=1)

    def forward(self, X):
        embedding = self.embedding(X)
        lstm_outputs, _ = self.rnn(embedding)
        context, attn_weights = self.attention(lstm_outputs)
        #print(rnn_outputs.shape)
        #if len(rnn_outputs.shape) == 2:  # When seq_len = 1, rnn_outputs has shape [batch_size, hidden_size * 2]
         #   rnn_outputs = rnn_outputs.unsqueeze(1)  # Add a dimension to match expected shape

        # Select the last hidden state of the sequence for each batch
        #last_hidden_state = rnn_outputs[:, -1, :]
        # Pass it through the final fully connected layer
        return self.fc(context)
    
device = 'cuda'
model = LSTMmodel(matrix_size).to(device)
BCE_loss = nn.BCEWithLogitsLoss()
Adam_optimizer = torch.optim.AdamW(params = model.parameters(),lr = 0.001)


epochs = 5
best_f1_score = -1
for epoch in tqdm(range(0,epochs)):
    print(f'Epoch {epoch}=======================================')
    train_step(model,train_dataloader,BCE_loss,Adam_optimizer,device = device)
    test_step(model,test_dataloader,BCE_loss,Adam_optimizer,device = device)
    #break

  0%|          | 0/5 [00:00<?, ?it/s]

------------------Train Result----------------------------
Training loss : 0.4057580530643463 | F1_score : 0.6022588684447744
Confusion matrix :
[[29991  1810]
 [ 5691  5679]]
Classification report :
              precision    recall  f1-score   support

           0     0.8405    0.9431    0.8888     31801
           1     0.7583    0.4995    0.6023     11370

    accuracy                         0.8262     43171
   macro avg     0.7994    0.7213    0.7456     43171
weighted avg     0.8189    0.8262    0.8134     43171

------------------Test Result----------------------------
Testing loss : 0.35370075702667236 | F1_score : 0.6723926380368097
Update new best model to : /kaggle/working/best_LSTMmodel.pth


 20%|██        | 1/5 [01:14<04:58, 74.51s/it]

Confusion matrix :
[[3759  193]
 [ 608  822]]
Classification report :
              precision    recall  f1-score   support

           0     0.8608    0.9512    0.9037      3952
           1     0.8099    0.5748    0.6724      1430

    accuracy                         0.8512      5382
   macro avg     0.8353    0.7630    0.7881      5382
weighted avg     0.8472    0.8512    0.8423      5382

---------------------------------------------------------
------------------Train Result----------------------------
Training loss : 0.27972298860549927 | F1_score : 0.7647726198354922
Confusion matrix :
[[30106  1695]
 [ 3281  8089]]
Classification report :
              precision    recall  f1-score   support

           0     0.9017    0.9467    0.9237     31801
           1     0.8268    0.7114    0.7648     11370

    accuracy                         0.8847     43171
   macro avg     0.8642    0.8291    0.8442     43171
weighted avg     0.8820    0.8847    0.8818     43171

-----------------

 40%|████      | 2/5 [02:29<03:44, 74.91s/it]

Confusion matrix :
[[3605  347]
 [ 410 1020]]
Classification report :
              precision    recall  f1-score   support

           0     0.8979    0.9122    0.9050      3952
           1     0.7462    0.7133    0.7294      1430

    accuracy                         0.8593      5382
   macro avg     0.8220    0.8127    0.8172      5382
weighted avg     0.8576    0.8593    0.8583      5382

---------------------------------------------------------
------------------Train Result----------------------------
Training loss : 0.1733601987361908 | F1_score : 0.8641114982578396
Confusion matrix :
[[30620  1181]
 [ 1822  9548]]
Classification report :
              precision    recall  f1-score   support

           0     0.9438    0.9629    0.9533     31801
           1     0.8899    0.8398    0.8641     11370

    accuracy                         0.9304     43171
   macro avg     0.9169    0.9013    0.9087     43171
weighted avg     0.9296    0.9304    0.9298     43171



 60%|██████    | 3/5 [03:43<02:29, 74.63s/it]

------------------Test Result----------------------------
Testing loss : 0.4196995496749878 | F1_score : 0.7102129195236375
Confusion matrix :
[[3595  357]
 [ 446  984]]
Classification report :
              precision    recall  f1-score   support

           0     0.8896    0.9097    0.8995      3952
           1     0.7338    0.6881    0.7102      1430

    accuracy                         0.8508      5382
   macro avg     0.8117    0.7989    0.8049      5382
weighted avg     0.8482    0.8508    0.8492      5382

---------------------------------------------------------
------------------Train Result----------------------------
Training loss : 0.0909842923283577 | F1_score : 0.9323388385721897
Confusion matrix :
[[31147   654]
 [  870 10500]]
Classification report :
              precision    recall  f1-score   support

           0     0.9728    0.9794    0.9761     31801
           1     0.9414    0.9235    0.9323     11370

    accuracy                         0.9647     43171
   

 80%|████████  | 4/5 [04:58<01:14, 74.49s/it]

------------------Test Result----------------------------
Testing loss : 0.4988979995250702 | F1_score : 0.7005967005967007
Confusion matrix :
[[3531  421]
 [ 432  998]]
Classification report :
              precision    recall  f1-score   support

           0     0.8910    0.8935    0.8922      3952
           1     0.7033    0.6979    0.7006      1430

    accuracy                         0.8415      5382
   macro avg     0.7972    0.7957    0.7964      5382
weighted avg     0.8411    0.8415    0.8413      5382

---------------------------------------------------------
------------------Train Result----------------------------
Training loss : 0.04765291139483452 | F1_score : 0.9684573246156565
Confusion matrix :
[[31496   305]
 [  409 10961]]
Classification report :
              precision    recall  f1-score   support

           0     0.9872    0.9904    0.9888     31801
           1     0.9729    0.9640    0.9685     11370

    accuracy                         0.9835     43171
  

100%|██████████| 5/5 [06:12<00:00, 74.51s/it]

------------------Test Result----------------------------
Testing loss : 0.6509868502616882 | F1_score : 0.699437148217636
Confusion matrix :
[[3649  303]
 [ 498  932]]
Classification report :
              precision    recall  f1-score   support

           0     0.8799    0.9233    0.9011      3952
           1     0.7547    0.6517    0.6994      1430

    accuracy                         0.8512      5382
   macro avg     0.8173    0.7875    0.8003      5382
weighted avg     0.8466    0.8512    0.8475      5382

---------------------------------------------------------





In [11]:
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

def F1_tensor(y_true, y_pred):
    y_true = y_true.to('cpu').numpy()
    y_pred = y_pred.to('cpu').numpy()
    return f1_score(y_true, y_pred)

def Confusion_matrix_tensor(y_true, y_pred):
    y_true = y_true.to('cpu').numpy()
    y_pred = y_pred.to('cpu').numpy()
    return f1_score(y_true, y_pred)

def convert_from_tensor(y): #convert from tensor to some kind of array that we can use numpy
    return y.cpu().detach().numpy().reshape(-1)

def take_all_elem(container, target):
    for x in target:
        if (x != 0 and x != 1):
            container.append(1)
        else:
            container.append(x)

def save_model(model):
    MODEL_PATH = Path('/content')
    MODEL_PATH.mkdir(parents = True, exist_ok = True)
    MODEL_NAME = 'best_GRUmodel.pth'
    MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME
    print(f'Update new best model to : {MODEL_SAVE_PATH}')
    torch.save(obj = model.state_dict(),f = MODEL_SAVE_PATH)



In [5]:
def train_step(model : nn.Module,
               data_loader : torch.utils.data.DataLoader,
               loss_function : nn.Module,
               optimizer,
               device = 'cuda'):
    model.train()
    loss = 0

    all_y_true = []
    all_y_pred = []

    for (X_train,y_train) in data_loader:
        X_train = X_train.to(device)
        y_train = y_train.unsqueeze(1).to(device)

        y_pred = model(X_train)
        y_pred01 = torch.round(torch.sigmoid(y_pred))

        batch_loss = loss_function(y_pred.float(),y_train.float())
        loss += batch_loss

        take_all_elem(all_y_true,convert_from_tensor(y_train))
        take_all_elem(all_y_pred,convert_from_tensor(y_pred01))


        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()

    loss /= len(data_loader)

    all_y_true = np.array(all_y_true)
    all_y_pred = np.array(all_y_pred)

    #print(all_y_true)
    #print(np.unique(all_y_true))

    print('------------------Train Result----------------------------')
    print(f'Training loss : {loss} | F1_score : {f1_score(all_y_true,all_y_pred)}')

    print(classification_report(all_y_true, all_y_pred, digits=4))

best_f1_score = -1


In [6]:
def test_step(model : nn.Module,
              data_loader : torch.utils.data.DataLoader,
              loss_function : nn.Module,
              optimizer,
              device = 'cuda',):

    model.eval()
    loss,acc = 0,0

    all_y_true = []
    all_y_pred = []

    with torch.inference_mode():
        loss = 0

        for (X_test,y_test) in data_loader:
            X_test = X_test.to(device)
            y_test = y_test.to(device)

            test_logits = model(X_test).squeeze()
            test_01 = torch.round(torch.sigmoid(test_logits))

            batch_loss = loss_function(test_logits.float(),y_test.float())

            loss += batch_loss

            take_all_elem(all_y_true,convert_from_tensor(y_test))
            take_all_elem(all_y_pred,convert_from_tensor(test_01))


        loss /= len(data_loader)
        acc /= len(data_loader)

    current_f1_score = f1_score(all_y_true,all_y_pred)
    print('------------------Test Result----------------------------')
    print(f'Testing loss : {loss} | F1_score : {current_f1_score}')
    print('---------------------------------------------------------')

    print(classification_report(all_y_true, all_y_pred, digits=4))

    global best_f1_score
    if (current_f1_score > best_f1_score):
        best_f1_score = current_f1_score
        save_model(model)

matrix_size = (128,768)




In [7]:
class GRUmodel(nn.Module):
    def __init__(self):
        super().__init__()

        self.embedding = nn.Embedding(64001, 768)
        self.rnn = nn.GRU(input_size = matrix_size[1],hidden_size = 384,
            num_layers = 1, batch_first = True, bidirectional = False)

        self.fc = nn.LazyLinear(out_features = 1)

    #it output [0,1,2,....,seq_length - 1]
    #just take the last array element in case of classification or anything like that
    def forward(self, X, state=None):
        X = self.embedding(X)
        rnn_outputs, _ = self.rnn(X, state)

        return self.fc(rnn_outputs)[:, -1, :]

    def feature_extract(self, X, state = None):
        rnn_outputs, _ = self.rnn(X, state)
        return rnn_outputs[:,-1,:]


In [8]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = GRUmodel().to(device)
BCE_loss = nn.BCEWithLogitsLoss()
Adam_optimizer = torch.optim.AdamW(params = model.parameters(),lr = 0.0001)


In [9]:
epochs = 30

for epoch in range(0,epochs):
    print(f'Epoch {epoch}=======================================')
    train_step(model,train_dataloader,BCE_loss,Adam_optimizer,device = device)
    test_step( model,test_dataloader,BCE_loss,Adam_optimizer,device = device)
    

------------------Train Result----------------------------
Training loss : 0.5795738697052002 | F1_score : 0.006955916876793323
              precision    recall  f1-score   support

           0     0.7368    0.9971    0.8474     31801
           1     0.3053    0.0035    0.0070     11370

    accuracy                         0.7354     43171
   macro avg     0.5211    0.5003    0.4272     43171
weighted avg     0.6231    0.7354    0.6260     43171

------------------Test Result----------------------------
Testing loss : 0.5805929899215698 | F1_score : 0.0
---------------------------------------------------------
              precision    recall  f1-score   support

           0     0.7343    1.0000    0.8468      3952
           1     0.0000    0.0000    0.0000      1430

    accuracy                         0.7343      5382
   macro avg     0.3671    0.5000    0.4234      5382
weighted avg     0.5392    0.7343    0.6218      5382

Update new best model to : /content/best_GRUmodel.p

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


------------------Train Result----------------------------
Training loss : 0.521623969078064 | F1_score : 0.26818181818181813
              precision    recall  f1-score   support

           0     0.7657    0.9742    0.8574     31801
           1     0.6967    0.1661    0.2682     11370

    accuracy                         0.7613     43171
   macro avg     0.7312    0.5701    0.5628     43171
weighted avg     0.7475    0.7613    0.7022     43171

------------------Test Result----------------------------
Testing loss : 0.4302860498428345 | F1_score : 0.5929810407422348
---------------------------------------------------------
              precision    recall  f1-score   support

           0     0.8396    0.9205    0.8782      3952
           1     0.7007    0.5140    0.5930      1430

    accuracy                         0.8125      5382
   macro avg     0.7701    0.7173    0.7356      5382
weighted avg     0.8027    0.8125    0.8024      5382

Update new best model to : /content/be