In [1]:
from torch import nn
import torch
import numpy as np
import pandas as pd
import joblib
from torch.autograd import Variable
import pandas as pd
from pathlib import Path
from sklearn.metrics import classification_report


In [2]:
trainloader =  joblib.load('/kaggle/input/data-loader/train_data_loader.pkl')
valloader = joblib.load('/kaggle/input/data-loader/val_data_loader.pkl')
testloader = joblib.load('/kaggle/input/data-loader/test_data_loader.pkl')

BATCH_SIZE = 128
train_dataloader = torch.utils.data.DataLoader(trainloader.dataset,batch_size= BATCH_SIZE, shuffle= True)
test_dataloader = torch.utils.data.DataLoader(testloader.dataset,batch_size= BATCH_SIZE)
val_dataloader = torch.utils.data.DataLoader(valloader.dataset,batch_size= BATCH_SIZE)


  return torch.load(io.BytesIO(b))


In [7]:
import torch.nn.functional as F
class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.attention = nn.Linear(hidden_dim, 1)

    def forward(self, lstm_output):
        """
        lstm_output: Tensor of shape (batch_size, seq_len, hidden_dim)
        """
        # Compute attention scores
        attn_scores = self.attention(lstm_output).squeeze(-1)  # (batch_size, seq_len)
        attn_weights = F.softmax(attn_scores, dim=1)  # Normalize scores to probabilities
        
        # Compute context vector as weighted sum of LSTM outputs
        context = torch.bmm(attn_weights.unsqueeze(1), lstm_output).squeeze(1)  # (batch_size, hidden_dim)
        
        return context, attn_weights

In [8]:
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

def F1_tensor(y_true, y_pred):
    y_true = y_true.to('cpu').numpy()
    y_pred = y_pred.to('cpu').numpy()
    return f1_score(y_true, y_pred)

def Confusion_matrix_tensor(y_true, y_pred):
    y_true = y_true.to('cpu').numpy()
    y_pred = y_pred.to('cpu').numpy()
    return f1_score(y_true, y_pred)

def convert_from_tensor(y): #convert from tensor to some kind of array that we can use numpy
    return y.cpu().detach().numpy().reshape(-1)

def take_all_elem(container, target):
    for x in target:
        if (x != 0 and x != 1):
            container.append(1)
        else:
            container.append(x)

def save_model(model):
    MODEL_PATH = Path('/content')
    MODEL_PATH.mkdir(parents = True, exist_ok = True)
    MODEL_NAME = 'best_GRUmodel.pth'
    MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME
    print(f'Update new best model to : {MODEL_SAVE_PATH}')
    torch.save(obj = model.state_dict(),f = MODEL_SAVE_PATH)



In [14]:
def train_step(model : nn.Module,
               data_loader : torch.utils.data.DataLoader,
               loss_function : nn.Module,
               optimizer,
               device = 'cuda'):
    model.train()
    loss = 0

    all_y_true = []
    all_y_pred = []

    for (X_train,y_train,mask) in data_loader:
        X_train = X_train.to(device)
        y_train = y_train.unsqueeze(1).to(device)

        y_pred = model(X_train)
        y_pred01 = torch.round(torch.sigmoid(y_pred))

        batch_loss = loss_function(y_pred.float(),y_train.float())
        loss += batch_loss

        take_all_elem(all_y_true,convert_from_tensor(y_train))
        take_all_elem(all_y_pred,convert_from_tensor(y_pred01))


        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()

    loss /= len(data_loader)

    all_y_true = np.array(all_y_true)
    all_y_pred = np.array(all_y_pred)

    #print(all_y_true)
    #print(np.unique(all_y_true))

    print('------------------Train Result----------------------------')
    print(f'Training loss : {loss} | F1_score : {f1_score(all_y_true,all_y_pred)}')

    print(classification_report(all_y_true, all_y_pred, digits=4))

best_f1_score = -1


In [15]:
def test_step(model : nn.Module,
              data_loader : torch.utils.data.DataLoader,
              loss_function : nn.Module,
              optimizer,
              device = 'cuda',):

    model.eval()
    loss,acc = 0,0

    all_y_true = []
    all_y_pred = []

    with torch.inference_mode():
        loss = 0

        for (X_test,y_test,mask) in data_loader:
            X_test = X_test.to(device)
            y_test = y_test.to(device)

            test_logits = model(X_test).squeeze()
            test_01 = torch.round(torch.sigmoid(test_logits))

            batch_loss = loss_function(test_logits.float(),y_test.float())

            loss += batch_loss

            take_all_elem(all_y_true,convert_from_tensor(y_test))
            take_all_elem(all_y_pred,convert_from_tensor(test_01))


        loss /= len(data_loader)
        acc /= len(data_loader)

    current_f1_score = f1_score(all_y_true,all_y_pred)
    print('------------------Test Result----------------------------')
    print(f'Testing loss : {loss} | F1_score : {current_f1_score}')
    print('---------------------------------------------------------')

    print(classification_report(all_y_true, all_y_pred, digits=4))

    global best_f1_score
    if (current_f1_score > best_f1_score):
        best_f1_score = current_f1_score
        save_model(model)

matrix_size = (128,768)




In [16]:
class GRUmodel(nn.Module):
    def __init__(self):
        super().__init__()

        self.embedding = nn.Embedding(64001, 768)
        self.rnn = nn.GRU(input_size = matrix_size[1],hidden_size = 384,
            num_layers = 3, batch_first = True, bidirectional = False)
        self.attention = Attention(384)
        self.fc = nn.Linear(384,out_features = 1)

    #it output [0,1,2,....,seq_length - 1]
    #just take the last array element in case of classification or anything like that
    def forward(self, X, state=None):
        X = self.embedding(X)
        gru_outputs, _ = self.rnn(X, state)
        context, attn_weights = self.attention(gru_outputs)
        return self.fc(context)


In [17]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = GRUmodel().to(device)
BCE_loss = nn.BCEWithLogitsLoss()
Adam_optimizer = torch.optim.AdamW(params = model.parameters(),lr = 0.0001)


In [18]:
epochs = 10

for epoch in range(0,epochs):
    print(f'Epoch {epoch}=======================================')
    train_step(model,train_dataloader,BCE_loss,Adam_optimizer,device = device)
    test_step( model,test_dataloader,BCE_loss,Adam_optimizer,device = device)
    

------------------Train Result----------------------------
Training loss : 0.45532846450805664 | F1_score : 0.4922018620648757
              precision    recall  f1-score   support

           0     0.8084    0.9578    0.8768     31801
           1     0.7555    0.3650    0.4922     11370

    accuracy                         0.8016     43171
   macro avg     0.7819    0.6614    0.6845     43171
weighted avg     0.7945    0.8016    0.7755     43171

------------------Test Result----------------------------
Testing loss : 0.3817872405052185 | F1_score : 0.6669282071400549
---------------------------------------------------------
              precision    recall  f1-score   support

           0     0.8639    0.9319    0.8967      3952
           1     0.7596    0.5944    0.6669      1430

    accuracy                         0.8423      5382
   macro avg     0.8118    0.7632    0.7818      5382
weighted avg     0.8362    0.8423    0.8356      5382

Update new best model to : /content/b

In [19]:
test_step(model,test_dataloader,BCE_loss,Adam_optimizer,device = device)

------------------Test Result----------------------------
Testing loss : 0.5913235545158386 | F1_score : 0.7070707070707071
---------------------------------------------------------
              precision    recall  f1-score   support

           0     0.8828    0.9246    0.9032      3952
           1     0.7603    0.6608    0.7071      1430

    accuracy                         0.8545      5382
   macro avg     0.8215    0.7927    0.8051      5382
weighted avg     0.8503    0.8545    0.8511      5382

