In [1]:
from torch import nn
import torch
import numpy as np
import pandas as pd
import joblib
from torch.autograd import Variable
import pandas as pd
from pathlib import Path
from sklearn.metrics import classification_report


In [2]:
trainloader =  joblib.load('/kaggle/input/data-loader/train_data_loader.pkl')
valloader = joblib.load('/kaggle/input/data-loader/val_data_loader.pkl')
testloader = joblib.load('/kaggle/input/data-loader/test_data_loader.pkl')

BATCH_SIZE = 128
train_dataloader = torch.utils.data.DataLoader(trainloader.dataset,batch_size= BATCH_SIZE, shuffle= True)
test_dataloader = torch.utils.data.DataLoader(testloader.dataset,batch_size= BATCH_SIZE)
val_dataloader = torch.utils.data.DataLoader(valloader.dataset,batch_size= BATCH_SIZE)


  return torch.load(io.BytesIO(b))


In [None]:
from transformers import AutoModel, AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained('vinai/phobert-base-v2')
phoBert = AutoModel.from_pretrained('vinai/phobert-base-v2')

In [3]:
from torch import nn
from tqdm import tqdm
import torch.nn.functional as F
#--------------------------------------------training loop----------------------
#we will output F1 score or confusion matrix at each step

from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

def F1_tensor(y_true, y_pred):
    y_true = y_true.to('cpu').numpy()
    y_pred = y_pred.to('cpu').numpy()
    return f1_score(y_true, y_pred)

def Confusion_matrix_tensor(y_true, y_pred):
    y_true = y_true.to('cpu').numpy()
    y_pred = y_pred.to('cpu').numpy()
    return f1_score(y_true, y_pred)

def convert_from_tensor(y): #convert from tensor to some kind of array that we can use numpy
    return y.cpu().detach().numpy().reshape(-1)

def take_all_elem(container, target):
    for x in target:
        if (x != 0 and x != 1):
            container.append(1)
        else:
            container.append(x)

def save_model(model):
    MODEL_PATH = Path('/kaggle/working/')
    MODEL_PATH.mkdir(parents = True, exist_ok = True)
    MODEL_NAME = 'best_LSTMmodel.pth'
    MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME
    print(f'Update new best model to : {MODEL_SAVE_PATH}')
    torch.save(obj = model.state_dict(),f = MODEL_SAVE_PATH)

def train_step(model : nn.Module,
               data_loader : torch.utils.data.DataLoader,
               loss_function : nn.Module,
               optimizer,
               device = 'cuda'):
    model.train()
    loss = 0

    all_y_true = []
    all_y_pred = []

    for batch in data_loader:
        X_train = batch[0].to(device)
        y_train = batch[1].to(device)
        mask = batch[2].to(device)
        #print(y_train.type())
        #embedding = phoBert(X_train,mask)[0]
        y_pred = model(X_train)
        y_pred01 = torch.round(torch.sigmoid(y_pred))

        batch_loss = loss_function(y_pred.float(),y_train.unsqueeze(1).float())
        #print(batch_loss.type())
        loss += batch_loss

        take_all_elem(all_y_true,convert_from_tensor(y_train))
        take_all_elem(all_y_pred,convert_from_tensor(y_pred01))


        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()

    loss /= len(data_loader)

    all_y_true = np.array(all_y_true)
    all_y_pred = np.array(all_y_pred)

    #print(all_y_true)
    #print(np.unique(all_y_true))

    print('------------------Train Result----------------------------')
    print(f'Training loss : {loss} | F1_score : {f1_score(all_y_true,all_y_pred)}')
    print(f'Confusion matrix :')
    print(confusion_matrix(all_y_true,all_y_pred))
    print(f'Classification report :')
    print(classification_report(all_y_true, all_y_pred, digits=4))


def test_step(model : nn.Module,
              data_loader : torch.utils.data.DataLoader,
              loss_function : nn.Module,
              optimizer,
              device = 'cuda'):

    model.eval()
    loss,acc = 0,0
    all_y_true = []
    all_y_pred = []

    with torch.no_grad():
        loss = 0

        for (X_test,y_test,mask) in data_loader:
            X_test = X_test.to(device)
            y_test = y_test.to(device)
            mask = mask.to(device)
            #embedding = phoBert(X_test,mask)[0]
            test_logits = model(X_test).squeeze()
            test_01 = torch.round(torch.sigmoid(test_logits))

            batch_loss = loss_function(test_logits.float(),y_test.float())

            loss += batch_loss

            take_all_elem(all_y_true,convert_from_tensor(y_test))
            take_all_elem(all_y_pred,convert_from_tensor(test_01))


        loss /= len(data_loader)
        acc /= len(data_loader)
    current_f1_score = f1_score(all_y_true,all_y_pred)
    print('------------------Test Result----------------------------')
    print(f'Testing loss : {loss} | F1_score : {current_f1_score}')
    global best_f1_score
    if (current_f1_score > best_f1_score):
        best_f1_score = current_f1_score
        save_model(model)
    print(f'Confusion matrix :')
    print(confusion_matrix(all_y_true,all_y_pred))
    print(f'Classification report :')
    print(classification_report(all_y_true, all_y_pred, digits=4))
    print('---------------------------------------------------------')

matrix_size = (64001,768)

class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.attention = nn.Linear(hidden_dim, 1)

    def forward(self, lstm_output):
        """
        lstm_output: Tensor of shape (batch_size, seq_len, hidden_dim)
        """
        # Compute attention scores
        attn_scores = self.attention(lstm_output).squeeze(-1)  # (batch_size, seq_len)
        attn_weights = F.softmax(attn_scores, dim=1)  # Normalize scores to probabilities
        
        # Compute context vector as weighted sum of LSTM outputs
        context = torch.bmm(attn_weights.unsqueeze(1), lstm_output).squeeze(1)  # (batch_size, hidden_dim)
        
        return context, attn_weights

In [4]:
class LSTMmodel(nn.Module):
    def __init__(self, matrix_size):
        super().__init__()
        self.embedding = nn.Embedding(64001,768)
        #phoBert
        self.rnn = nn.LSTM(input_size=matrix_size[1], hidden_size=512,
                           num_layers=2, batch_first=True, bidirectional=True)
        self.attention = Attention(512 * 2)
        # Output size should be doubled due to bidirectionality
        self.fc = nn.Linear(512 * 2,out_features=1)

    def forward(self,X):
        embedding = self.embedding(X)
        lstm_outputs, _ = self.rnn(embedding)
        context, attn_weights = self.attention(lstm_outputs)
        return self.fc(context)
    


In [5]:
device = 'cuda'
model = LSTMmodel(matrix_size).to(device)
BCE_loss = nn.BCEWithLogitsLoss()
Adam_optimizer = torch.optim.AdamW(params = model.parameters(),lr = 0.001)


epochs = 10
best_f1_score = -1
for epoch in tqdm(range(0,epochs)):
    print(f'Epoch {epoch}=======================================')
    train_step(model,train_dataloader,BCE_loss,Adam_optimizer,device = device)
    test_step(model,val_dataloader,BCE_loss,Adam_optimizer,device = device)

  0%|          | 0/10 [00:00<?, ?it/s]

------------------Train Result----------------------------
Training loss : 0.4027549624443054 | F1_score : 0.6110020688557637
Confusion matrix :
[[30079  1722]
 [ 5611  5759]]
Classification report :
              precision    recall  f1-score   support

           0     0.8428    0.9459    0.8913     31801
           1     0.7698    0.5065    0.6110     11370

    accuracy                         0.8301     43171
   macro avg     0.8063    0.7262    0.7512     43171
weighted avg     0.8236    0.8301    0.8175     43171

------------------Test Result----------------------------
Testing loss : 0.36150553822517395 | F1_score : 0.6809353943717796
Update new best model to : /kaggle/working/best_LSTMmodel.pth


 10%|█         | 1/10 [01:13<11:00, 73.38s/it]

Confusion matrix :
[[3718  220]
 [ 585  859]]
Classification report :
              precision    recall  f1-score   support

           0     0.8640    0.9441    0.9023      3938
           1     0.7961    0.5949    0.6809      1444

    accuracy                         0.8504      5382
   macro avg     0.8301    0.7695    0.7916      5382
weighted avg     0.8458    0.8504    0.8429      5382

---------------------------------------------------------
------------------Train Result----------------------------
Training loss : 0.28000539541244507 | F1_score : 0.7661244562133536
Confusion matrix :
[[30124  1677]
 [ 3269  8101]]
Classification report :
              precision    recall  f1-score   support

           0     0.9021    0.9473    0.9241     31801
           1     0.8285    0.7125    0.7661     11370

    accuracy                         0.8854     43171
   macro avg     0.8653    0.8299    0.8451     43171
weighted avg     0.8827    0.8854    0.8825     43171

-----------------

 20%|██        | 2/10 [02:27<09:51, 73.90s/it]

Confusion matrix :
[[3652  286]
 [ 504  940]]
Classification report :
              precision    recall  f1-score   support

           0     0.8787    0.9274    0.9024      3938
           1     0.7667    0.6510    0.7041      1444

    accuracy                         0.8532      5382
   macro avg     0.8227    0.7892    0.8033      5382
weighted avg     0.8487    0.8532    0.8492      5382

---------------------------------------------------------
------------------Train Result----------------------------
Training loss : 0.17630010843276978 | F1_score : 0.8643547367470423
Confusion matrix :
[[30596  1205]
 [ 1799  9571]]
Classification report :
              precision    recall  f1-score   support

           0     0.9445    0.9621    0.9532     31801
           1     0.8882    0.8418    0.8644     11370

    accuracy                         0.9304     43171
   macro avg     0.9163    0.9019    0.9088     43171
weighted avg     0.9296    0.9304    0.9298     43171

-----------------

 30%|███       | 3/10 [03:41<08:38, 74.03s/it]

Confusion matrix :
[[3506  432]
 [ 423 1021]]
Classification report :
              precision    recall  f1-score   support

           0     0.8923    0.8903    0.8913      3938
           1     0.7027    0.7071    0.7049      1444

    accuracy                         0.8411      5382
   macro avg     0.7975    0.7987    0.7981      5382
weighted avg     0.8415    0.8411    0.8413      5382

---------------------------------------------------------
------------------Train Result----------------------------
Training loss : 0.08572310954332352 | F1_score : 0.9381918002302311
Confusion matrix :
[[31180   621]
 [  775 10595]]
Classification report :
              precision    recall  f1-score   support

           0     0.9757    0.9805    0.9781     31801
           1     0.9446    0.9318    0.9382     11370

    accuracy                         0.9677     43171
   macro avg     0.9602    0.9562    0.9581     43171
weighted avg     0.9676    0.9677    0.9676     43171



 40%|████      | 4/10 [04:55<07:22, 73.82s/it]

------------------Test Result----------------------------
Testing loss : 0.5532641410827637 | F1_score : 0.7023480662983426
Confusion matrix :
[[3503  435]
 [ 427 1017]]
Classification report :
              precision    recall  f1-score   support

           0     0.8913    0.8895    0.8904      3938
           1     0.7004    0.7043    0.7023      1444

    accuracy                         0.8398      5382
   macro avg     0.7959    0.7969    0.7964      5382
weighted avg     0.8401    0.8398    0.8400      5382

---------------------------------------------------------
------------------Train Result----------------------------
Training loss : 0.04871148243546486 | F1_score : 0.9668973420901837
Confusion matrix :
[[31452   349]
 [  402 10968]]
Classification report :
              precision    recall  f1-score   support

           0     0.9874    0.9890    0.9882     31801
           1     0.9692    0.9646    0.9669     11370

    accuracy                         0.9826     43171
  

 50%|█████     | 5/10 [06:08<06:08, 73.71s/it]

------------------Test Result----------------------------
Testing loss : 0.6766282916069031 | F1_score : 0.6851457000710732
Confusion matrix :
[[3532  406]
 [ 480  964]]
Classification report :
              precision    recall  f1-score   support

           0     0.8804    0.8969    0.8886      3938
           1     0.7036    0.6676    0.6851      1444

    accuracy                         0.8354      5382
   macro avg     0.7920    0.7822    0.7868      5382
weighted avg     0.8329    0.8354    0.8340      5382

---------------------------------------------------------
------------------Train Result----------------------------
Training loss : 0.0291376281529665 | F1_score : 0.9817175002197415
Confusion matrix :
[[31586   215]
 [  201 11169]]
Classification report :
              precision    recall  f1-score   support

           0     0.9937    0.9932    0.9935     31801
           1     0.9811    0.9823    0.9817     11370

    accuracy                         0.9904     43171
   

 60%|██████    | 6/10 [07:22<04:54, 73.66s/it]

------------------Test Result----------------------------
Testing loss : 0.7663545608520508 | F1_score : 0.6924453992123164
Confusion matrix :
[[3556  382]
 [ 477  967]]
Classification report :
              precision    recall  f1-score   support

           0     0.8817    0.9030    0.8922      3938
           1     0.7168    0.6697    0.6924      1444

    accuracy                         0.8404      5382
   macro avg     0.7993    0.7863    0.7923      5382
weighted avg     0.8375    0.8404    0.8386      5382

---------------------------------------------------------
------------------Train Result----------------------------
Training loss : 0.020146364346146584 | F1_score : 0.9870278351875468
Confusion matrix :
[[31653   148]
 [  147 11223]]
Classification report :
              precision    recall  f1-score   support

           0     0.9954    0.9953    0.9954     31801
           1     0.9870    0.9871    0.9870     11370

    accuracy                         0.9932     43171
 

 70%|███████   | 7/10 [08:35<03:40, 73.60s/it]

------------------Test Result----------------------------
Testing loss : 0.8163372874259949 | F1_score : 0.7046109510086456
Confusion matrix :
[[3584  354]
 [ 466  978]]
Classification report :
              precision    recall  f1-score   support

           0     0.8849    0.9101    0.8973      3938
           1     0.7342    0.6773    0.7046      1444

    accuracy                         0.8476      5382
   macro avg     0.8096    0.7937    0.8010      5382
weighted avg     0.8445    0.8476    0.8456      5382

---------------------------------------------------------
------------------Train Result----------------------------
Training loss : 0.01708260178565979 | F1_score : 0.989528335093277
Confusion matrix :
[[31688   113]
 [  125 11245]]
Classification report :
              precision    recall  f1-score   support

           0     0.9961    0.9964    0.9963     31801
           1     0.9901    0.9890    0.9895     11370

    accuracy                         0.9945     43171
   

 80%|████████  | 8/10 [09:49<02:27, 73.57s/it]

------------------Test Result----------------------------
Testing loss : 0.8887156248092651 | F1_score : 0.6957142857142857
Confusion matrix :
[[3556  382]
 [ 470  974]]
Classification report :
              precision    recall  f1-score   support

           0     0.8833    0.9030    0.8930      3938
           1     0.7183    0.6745    0.6957      1444

    accuracy                         0.8417      5382
   macro avg     0.8008    0.7888    0.7944      5382
weighted avg     0.8390    0.8417    0.8401      5382

---------------------------------------------------------
------------------Train Result----------------------------
Training loss : 0.016425568610429764 | F1_score : 0.9886068710684907
Confusion matrix :
[[31675   126]
 [  133 11237]]
Classification report :
              precision    recall  f1-score   support

           0     0.9958    0.9960    0.9959     31801
           1     0.9889    0.9883    0.9886     11370

    accuracy                         0.9940     43171
 

 90%|█████████ | 9/10 [11:02<01:13, 73.55s/it]

------------------Test Result----------------------------
Testing loss : 0.84549880027771 | F1_score : 0.6942034015966678
Confusion matrix :
[[3501  437]
 [ 444 1000]]
Classification report :
              precision    recall  f1-score   support

           0     0.8875    0.8890    0.8882      3938
           1     0.6959    0.6925    0.6942      1444

    accuracy                         0.8363      5382
   macro avg     0.7917    0.7908    0.7912      5382
weighted avg     0.8361    0.8363    0.8362      5382

---------------------------------------------------------
------------------Train Result----------------------------
Training loss : 0.015145350247621536 | F1_score : 0.9906229363856482
Confusion matrix :
[[31707    94]
 [  119 11251]]
Classification report :
              precision    recall  f1-score   support

           0     0.9963    0.9970    0.9967     31801
           1     0.9917    0.9895    0.9906     11370

    accuracy                         0.9951     43171
   

100%|██████████| 10/10 [12:16<00:00, 73.65s/it]

------------------Test Result----------------------------
Testing loss : 0.9287515878677368 | F1_score : 0.696597685022799
Confusion matrix :
[[3524  414]
 [ 451  993]]
Classification report :
              precision    recall  f1-score   support

           0     0.8865    0.8949    0.8907      3938
           1     0.7058    0.6877    0.6966      1444

    accuracy                         0.8393      5382
   macro avg     0.7961    0.7913    0.7936      5382
weighted avg     0.8380    0.8393    0.8386      5382

---------------------------------------------------------





In [6]:
model = LSTMmodel(matrix_size).to(device)
model.load_state_dict(torch.load('best_LSTMmodel.pth'))
test_step(model,test_dataloader,BCE_loss,Adam_optimizer,device = device)

  model.load_state_dict(torch.load('best_LSTMmodel.pth'))


------------------Test Result----------------------------
Testing loss : 0.37176916003227234 | F1_score : 0.7247706422018348
Update new best model to : /kaggle/working/best_LSTMmodel.pth
Confusion matrix :
[[3575  377]
 [ 403 1027]]
Classification report :
              precision    recall  f1-score   support

           0     0.8987    0.9046    0.9016      3952
           1     0.7315    0.7182    0.7248      1430

    accuracy                         0.8551      5382
   macro avg     0.8151    0.8114    0.8132      5382
weighted avg     0.8543    0.8551    0.8546      5382

---------------------------------------------------------


In [7]:
import torch.nn.functional as F
class Attention(nn.Module):
    def __init__(self, hidden_dim):
        super(Attention, self).__init__()
        self.attention = nn.Linear(hidden_dim, 1)

    def forward(self, lstm_output):
        """
        lstm_output: Tensor of shape (batch_size, seq_len, hidden_dim)
        """
        # Compute attention scores
        attn_scores = self.attention(lstm_output).squeeze(-1)  # (batch_size, seq_len)
        attn_weights = F.softmax(attn_scores, dim=1)  # Normalize scores to probabilities
        
        # Compute context vector as weighted sum of LSTM outputs
        context = torch.bmm(attn_weights.unsqueeze(1), lstm_output).squeeze(1)  # (batch_size, hidden_dim)
        
        return context, attn_weights

In [8]:
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

def F1_tensor(y_true, y_pred):
    y_true = y_true.to('cpu').numpy()
    y_pred = y_pred.to('cpu').numpy()
    return f1_score(y_true, y_pred)

def Confusion_matrix_tensor(y_true, y_pred):
    y_true = y_true.to('cpu').numpy()
    y_pred = y_pred.to('cpu').numpy()
    return f1_score(y_true, y_pred)

def convert_from_tensor(y): #convert from tensor to some kind of array that we can use numpy
    return y.cpu().detach().numpy().reshape(-1)

def take_all_elem(container, target):
    for x in target:
        if (x != 0 and x != 1):
            container.append(1)
        else:
            container.append(x)

def save_model(model):
    MODEL_PATH = Path('/content')
    MODEL_PATH.mkdir(parents = True, exist_ok = True)
    MODEL_NAME = 'best_GRUmodel.pth'
    MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME
    print(f'Update new best model to : {MODEL_SAVE_PATH}')
    torch.save(obj = model.state_dict(),f = MODEL_SAVE_PATH)



In [14]:
def train_step(model : nn.Module,
               data_loader : torch.utils.data.DataLoader,
               loss_function : nn.Module,
               optimizer,
               device = 'cuda'):
    model.train()
    loss = 0

    all_y_true = []
    all_y_pred = []

    for (X_train,y_train,mask) in data_loader:
        X_train = X_train.to(device)
        y_train = y_train.unsqueeze(1).to(device)

        y_pred = model(X_train)
        y_pred01 = torch.round(torch.sigmoid(y_pred))

        batch_loss = loss_function(y_pred.float(),y_train.float())
        loss += batch_loss

        take_all_elem(all_y_true,convert_from_tensor(y_train))
        take_all_elem(all_y_pred,convert_from_tensor(y_pred01))


        optimizer.zero_grad()
        batch_loss.backward()
        optimizer.step()

    loss /= len(data_loader)

    all_y_true = np.array(all_y_true)
    all_y_pred = np.array(all_y_pred)

    #print(all_y_true)
    #print(np.unique(all_y_true))

    print('------------------Train Result----------------------------')
    print(f'Training loss : {loss} | F1_score : {f1_score(all_y_true,all_y_pred)}')

    print(classification_report(all_y_true, all_y_pred, digits=4))

best_f1_score = -1


In [15]:
def test_step(model : nn.Module,
              data_loader : torch.utils.data.DataLoader,
              loss_function : nn.Module,
              optimizer,
              device = 'cuda',):

    model.eval()
    loss,acc = 0,0

    all_y_true = []
    all_y_pred = []

    with torch.inference_mode():
        loss = 0

        for (X_test,y_test,mask) in data_loader:
            X_test = X_test.to(device)
            y_test = y_test.to(device)

            test_logits = model(X_test).squeeze()
            test_01 = torch.round(torch.sigmoid(test_logits))

            batch_loss = loss_function(test_logits.float(),y_test.float())

            loss += batch_loss

            take_all_elem(all_y_true,convert_from_tensor(y_test))
            take_all_elem(all_y_pred,convert_from_tensor(test_01))


        loss /= len(data_loader)
        acc /= len(data_loader)

    current_f1_score = f1_score(all_y_true,all_y_pred)
    print('------------------Test Result----------------------------')
    print(f'Testing loss : {loss} | F1_score : {current_f1_score}')
    print('---------------------------------------------------------')

    print(classification_report(all_y_true, all_y_pred, digits=4))

    global best_f1_score
    if (current_f1_score > best_f1_score):
        best_f1_score = current_f1_score
        save_model(model)

matrix_size = (128,768)




In [16]:
class GRUmodel(nn.Module):
    def __init__(self):
        super().__init__()

        self.embedding = nn.Embedding(64001, 768)
        self.rnn = nn.GRU(input_size = matrix_size[1],hidden_size = 384,
            num_layers = 3, batch_first = True, bidirectional = False)
        self.attention = Attention(384)
        self.fc = nn.Linear(384,out_features = 1)

    #it output [0,1,2,....,seq_length - 1]
    #just take the last array element in case of classification or anything like that
    def forward(self, X, state=None):
        X = self.embedding(X)
        gru_outputs, _ = self.rnn(X, state)
        context, attn_weights = self.attention(gru_outputs)
        return self.fc(context)


In [17]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model = GRUmodel().to(device)
BCE_loss = nn.BCEWithLogitsLoss()
Adam_optimizer = torch.optim.AdamW(params = model.parameters(),lr = 0.0001)


In [18]:
epochs = 10

for epoch in range(0,epochs):
    print(f'Epoch {epoch}=======================================')
    train_step(model,train_dataloader,BCE_loss,Adam_optimizer,device = device)
    test_step( model,test_dataloader,BCE_loss,Adam_optimizer,device = device)
    

------------------Train Result----------------------------
Training loss : 0.45532846450805664 | F1_score : 0.4922018620648757
              precision    recall  f1-score   support

           0     0.8084    0.9578    0.8768     31801
           1     0.7555    0.3650    0.4922     11370

    accuracy                         0.8016     43171
   macro avg     0.7819    0.6614    0.6845     43171
weighted avg     0.7945    0.8016    0.7755     43171

------------------Test Result----------------------------
Testing loss : 0.3817872405052185 | F1_score : 0.6669282071400549
---------------------------------------------------------
              precision    recall  f1-score   support

           0     0.8639    0.9319    0.8967      3952
           1     0.7596    0.5944    0.6669      1430

    accuracy                         0.8423      5382
   macro avg     0.8118    0.7632    0.7818      5382
weighted avg     0.8362    0.8423    0.8356      5382

Update new best model to : /content/b

In [19]:
test_step(model,test_dataloader,BCE_loss,Adam_optimizer,device = device)

------------------Test Result----------------------------
Testing loss : 0.5913235545158386 | F1_score : 0.7070707070707071
---------------------------------------------------------
              precision    recall  f1-score   support

           0     0.8828    0.9246    0.9032      3952
           1     0.7603    0.6608    0.7071      1430

    accuracy                         0.8545      5382
   macro avg     0.8215    0.7927    0.8051      5382
weighted avg     0.8503    0.8545    0.8511      5382



In [5]:
class BCNN(nn.Module):
    def __init__(self):

        super().__init__()

        self.embedding = nn.Embedding(64001,768)
        self.bidirectional_lstm = nn.LSTM(
            768, 512, 3,bidirectional=True, batch_first=True
        )
        self.conv1 = nn.Conv1d(in_channels=2*512, out_channels=32, kernel_size=4)
        self.conv2 = nn.Conv1d(in_channels=2*512, out_channels=32, kernel_size=5)
        self.attention = Attention(64)
        self.fc = nn.Linear(64, 1)

        self.dropout = nn.Dropout(0.1)

    def forward(self, X):

        #embedded = [batch size, sent len, emb dim]
        #embedded = self.fc_input(encoded)
        #print(embedded.shape)
        encoded = self.embedding(X)
        embedded, _ = self.bidirectional_lstm(encoded)
        embedded = embedded.permute(0, 2, 1)
        #print(embedded.shape)
        conved_1 = F.relu(self.conv1(embedded))
        conved_2 = F.relu(self.conv2(embedded))
        #conved_n = [batch size, n_filters, sent len - filter_sizes[n] + 1]

        pooled_1 = F.max_pool1d(conved_1, conved_1.shape[2]).squeeze(2)
        pooled_2 = F.max_pool1d(conved_2, conved_2.shape[2]).squeeze(2)
        #pooled_n = [batch size, n_fibatlters]

        cat = self.dropout(torch.cat((pooled_1, pooled_2), dim = -1))

        #cat = [batch size, n_filters * len(filter_sizes)]

        context, _ = self.attention(cat.unsqueeze(1))
        
        result =  self.fc(cat)

        return result

In [7]:
device = 'cuda'
model = BCNN().to(device)
BCE_loss = nn.BCEWithLogitsLoss()
Adam_optimizer = torch.optim.AdamW(params = model.parameters(),lr = 0.001)


epochs = 3
best_f1_score = -1
for epoch in tqdm(range(0,epochs)):
    print(f'Epoch {epoch}=======================================')
    train_step(model,train_dataloader,BCE_loss,Adam_optimizer,device = device)
    test_step(model,val_dataloader,BCE_loss,Adam_optimizer,device = device)

  0%|          | 0/3 [00:00<?, ?it/s]

------------------Train Result----------------------------
Training loss : 0.4337882697582245 | F1_score : 0.5533884297520661
Confusion matrix :
[[30043  1758]
 [ 6348  5022]]
Classification report :
              precision    recall  f1-score   support

           0     0.8256    0.9447    0.8811     31801
           1     0.7407    0.4417    0.5534     11370

    accuracy                         0.8122     43171
   macro avg     0.7831    0.6932    0.7173     43171
weighted avg     0.8032    0.8122    0.7948     43171

------------------Test Result----------------------------
Testing loss : 0.37103307247161865 | F1_score : 0.6644924582144313
Update new best model to : /kaggle/working/best_LSTMmodel.pth


 33%|███▎      | 1/3 [01:59<03:58, 119.04s/it]

Confusion matrix :
[[3744  194]
 [ 629  815]]
Classification report :
              precision    recall  f1-score   support

           0     0.8562    0.9507    0.9010      3938
           1     0.8077    0.5644    0.6645      1444

    accuracy                         0.8471      5382
   macro avg     0.8319    0.7576    0.7827      5382
weighted avg     0.8432    0.8471    0.8375      5382

---------------------------------------------------------
------------------Train Result----------------------------
Training loss : 0.3019334673881531 | F1_score : 0.7422858790733441
Confusion matrix :
[[30087  1714]
 [ 3648  7722]]
Classification report :
              precision    recall  f1-score   support

           0     0.8919    0.9461    0.9182     31801
           1     0.8184    0.6792    0.7423     11370

    accuracy                         0.8758     43171
   macro avg     0.8551    0.8126    0.8302     43171
weighted avg     0.8725    0.8758    0.8719     43171

------------------

 67%|██████▋   | 2/3 [03:58<01:59, 119.06s/it]

Confusion matrix :
[[3585  353]
 [ 429 1015]]
Classification report :
              precision    recall  f1-score   support

           0     0.8931    0.9104    0.9017      3938
           1     0.7420    0.7029    0.7219      1444

    accuracy                         0.8547      5382
   macro avg     0.8175    0.8066    0.8118      5382
weighted avg     0.8526    0.8547    0.8534      5382

---------------------------------------------------------
------------------Train Result----------------------------
Training loss : 0.21027088165283203 | F1_score : 0.8368522072936659
Confusion matrix :
[[30445  1356]
 [ 2214  9156]]
Classification report :
              precision    recall  f1-score   support

           0     0.9322    0.9574    0.9446     31801
           1     0.8710    0.8053    0.8369     11370

    accuracy                         0.9173     43171
   macro avg     0.9016    0.8813    0.8907     43171
weighted avg     0.9161    0.9173    0.9162     43171



100%|██████████| 3/3 [05:56<00:00, 118.92s/it]

------------------Test Result----------------------------
Testing loss : 0.391086608171463 | F1_score : 0.7038012796386902
Confusion matrix :
[[3660  278]
 [ 509  935]]
Classification report :
              precision    recall  f1-score   support

           0     0.8779    0.9294    0.9029      3938
           1     0.7708    0.6475    0.7038      1444

    accuracy                         0.8538      5382
   macro avg     0.8244    0.7885    0.8034      5382
weighted avg     0.8492    0.8538    0.8495      5382

---------------------------------------------------------





In [8]:
model = BCNN().to(device)
model.load_state_dict(torch.load('best_LSTMmodel.pth'))
test_step(model,test_dataloader,BCE_loss,Adam_optimizer,device = device)

  model.load_state_dict(torch.load('best_LSTMmodel.pth'))


------------------Test Result----------------------------
Testing loss : 0.3403221368789673 | F1_score : 0.7262206148282097
Update new best model to : /kaggle/working/best_LSTMmodel.pth
Confusion matrix :
[[3621  331]
 [ 426 1004]]
Classification report :
              precision    recall  f1-score   support

           0     0.8947    0.9162    0.9054      3952
           1     0.7521    0.7021    0.7262      1430

    accuracy                         0.8593      5382
   macro avg     0.8234    0.8092    0.8158      5382
weighted avg     0.8568    0.8593    0.8578      5382

---------------------------------------------------------
