In [None]:
import torch
from torchtext.vocab import build_vocab_from_iterator
import pandas as pd
from nltk.tokenize import WhitespaceTokenizer
from google.colab import drive
from torch.utils.data import Dataset, DataLoader
drive.mount('/content/drive/')
import string
import torch.nn.functional as F
from tqdm import tqdm
from sklearn.metrics import precision_score, recall_score, f1_score
import copy

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
!ls drive/MyDrive/others/Data/Validation/
!ls drive/MyDrive/others/Test/hindi/test.csv

valid_Bangla.csv  valid_Hindi.csv  valid_Magahi.csv
drive/MyDrive/others/Test/hindi/test.csv


## Data Loading And Preprocessing

In [None]:
train_df = pd.read_csv('drive/MyDrive/others/Data/Training/train_Hindi.csv')
train_df['sentence'] = train_df['sentence'].str.lower()
valid_df = pd.read_csv('drive/MyDrive/others/Data/Validation/valid_Hindi.csv')
valid_df['sentence'] = valid_df['sentence'].str.lower()
test_df = pd.read_csv('drive/MyDrive/others/Test/hindi/test.csv')
test_df['sentence'] = valid_df['sentence'].str.lower()

tokenizer = WhitespaceTokenizer()

punct = string.punctuation

def remove_punct_one(txt):
  for c in list(punct):
    txt = txt.replace(c, "")
  return txt

train_df['sentence'] = train_df['sentence'].apply(remove_punct_one)
valid_df['sentence'] = valid_df['sentence'].apply(remove_punct_one)
test_df['sentence'] = test_df['sentence'].apply(remove_punct_one)

### Vocab implementation on pre-processed dataset

In [None]:
def prepare_datasets_and_vocab(data_frame):
    ## all the files for MEMD-ABSA
    sentences = list(map(lambda x: tokenizer.tokenize(x), data_frame['sentence']))
    vocab = build_vocab_from_iterator(sentences, specials=["<unk>"], min_freq=2)
    vocab.set_default_index(vocab['<unk>'])
    return vocab

In [None]:
vocab = prepare_datasets_and_vocab(train_df)
print(f"Unique Words: {len(vocab)}")

Unique Words: 3332


In [None]:
print('Senitment Labels:')
train_df['sentiment'].value_counts()

Senitment Labels:


positive    1898
negative     419
mix          113
neutral       77
Name: sentiment, dtype: int64

## Model Architecture
Embedding size: 100

BI-LSTM: 64*2

Fully Connected: 128

Fully Connected: 3

In [None]:
class SentimentModel(torch.nn.Module):
    def __init__(self, vocab_size, emb_size):
        '''double embedding + lstm encoder + dot self attention'''
        super(SentimentModel, self).__init__()

        self.gen_embedding = torch.nn.Embedding(vocab_size, emb_size)
        self.dropout = torch.nn.Dropout(p=0.1)



        ## input size 400, output size 2 x 50
        self.norm1 = torch.nn.LayerNorm(emb_size)
        self.bilstm = torch.nn.LSTM(emb_size, 64,
                                    num_layers=2, batch_first=True, bidirectional=True)
        self.norm2 = torch.nn.LayerNorm(128)
        self.fc1 = torch.nn.Linear(128, 64)

        self.class_layer = torch.nn.Linear(64, 4)


    def forward(self, X):
        emb = self.dropout(self.gen_embedding(X))

        norm1 = self.norm1(emb)

        output, (h_n, c_n) =  self.bilstm(emb)
        # print(output.shape)
        # fc1 = self.fc1(F.relu(output[:, -1, :]))
        fc1 = self.fc1(self.norm2(output[:, -1, :]))
        classout = self.class_layer(fc1)
        return classout



In [None]:
class SentDataset(Dataset):
  def __init__(self, data_df, tokenizer, vocab, maxlen=128):
    self.tokenizer = tokenizer
    self.vocab = vocab
    self.data_df = data_df
    self.maxlen = maxlen
    self.label_encoding = { # add more if more sentiment labels are present
        "negative": 0,
        "neutral": 1,
        "positive":2,
        "mix":3,
    }
    self.punct = string.punctuation
    self.encoded_data = []
    self._build()

  def _build(self):
    for sentence, label in self.data_df.values:
      enc_tokens = [0] * self.maxlen
      tokens = self.tokenizer.tokenize(sentence)
      if len(tokens)<=128:
        enc_tokens[:len(tokens)] = self.vocab(tokens)
      else:
        enc_tokens[:128] = self.vocab(tokens)[:128]
      lab = self.label_encoding[label]
      self.encoded_data.append([torch.tensor(enc_tokens),  torch.tensor(lab)])

  def __getitem__(self, index):
     return self.encoded_data[index]

  def __len__(self):
    return len(self.encoded_data)


In [None]:
train_dataset = SentDataset(train_df, tokenizer, vocab)
valid_dataset = SentDataset(valid_df, tokenizer, vocab)
test_dataset = SentDataset(test_df, tokenizer, vocab)

In [None]:
vals = dict(train_df['sentiment'].value_counts())
tot = sum(vals.values())
w_neg = 1/vals['negative']
w_neu = 1/vals['neutral']
w_pos = 1/vals['positive']
w_mix = 1/vals['mix']
sent_weights = torch.tensor([w_neg, w_neu, w_pos, w_mix], dtype=torch.float)
sent_weights.to(device)

tensor([0.0024, 0.0130, 0.0005, 0.0088], device='cuda:0')

In [None]:
train_dataloader = DataLoader(train_dataset, batch_size=128)
valid_dataloader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
model = SentimentModel(len(vocab), 100)
loss_fn = torch.nn.CrossEntropyLoss(weight=sent_weights)
loss_fn.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)#, weight_decay=0.001)
best_model = None
model.to(device)


SentimentModel(
  (gen_embedding): Embedding(3332, 100)
  (dropout): Dropout(p=0.1, inplace=False)
  (norm1): LayerNorm((100,), eps=1e-05, elementwise_affine=True)
  (bilstm): LSTM(100, 64, num_layers=2, batch_first=True, bidirectional=True)
  (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
  (fc1): Linear(in_features=128, out_features=64, bias=True)
  (class_layer): Linear(in_features=64, out_features=4, bias=True)
)

In [None]:
def eval_loop(pred_model, dataloader):
  dload = tqdm(dataloader)
  preds = []
  golds = []
  pred_model.to(device)
  pred_model.eval()
  with torch.no_grad():
    for batch in dload:
      x, y = batch
      pred_out = pred_model(x.to(device))
      predictions = pred_out.argmax(dim=-1).tolist()
      gold_labs = y.tolist()
      for i in range(len(predictions)):
        preds.append(predictions[i])
        golds.append(gold_labs[i])

    precise_macro = precision_score(golds, preds, average='macro')#, zero_division=0)
    recall_macro = recall_score(golds, preds, average='macro')#, zero_division=0)
    f1_macro = f1_score(golds, preds, average='macro')#, zero_division=0)

  return preds, {
      'macro_precision': precise_macro,
      'macro_recall': recall_macro,
      'macro_f1': f1_macro
  }

In [None]:
N_EPOCHS = 80
best_f1 = -1
for epoch in range(N_EPOCHS):
  train_epoch_loss = 0
  model.to(device)
  model.train()
  train_ite = tqdm(train_dataloader)
  for batch in train_ite:
    x, l = batch
    x = x.to(device)
    l = l.to(device)
    output = model(x)

    loss = loss_fn(output, l)
    model.zero_grad()
    loss.backward()
    optimizer.step()
    train_ite.set_postfix({'training loss': loss.item()})
    train_epoch_loss += loss.item()

  validation_epoch_loss = 0
  valid_ite = tqdm(valid_dataloader)
  preds = []
  golds = []
  model.eval()
  with torch.no_grad():
    for batch in valid_ite:
      x, y = batch
      pred_out = model(x.to(device))
      preds += pred_out.argmax(dim=-1).tolist()
      golds += y.tolist()
      val_loss = loss_fn(pred_out.to(device),y.to(device))
      valid_ite.set_postfix({'validation loss': val_loss.item()})
      validation_epoch_loss = val_loss.item()
    print()
    print('--'*20, ' Validation Scores ', '--'*20)
    precise_macro = precision_score(golds, preds, average='macro', zero_division=0)
    recall_macro = recall_score(golds, preds, average='macro', zero_division=0)
    f1_macro = f1_score(golds, preds, average='macro', zero_division=0)
    if f1_macro > best_f1:
      best_model = copy.deepcopy(model)
      best_f1 = f1_macro
    print(f'EPOCH: {epoch}')
    print(f'avg training loss: {(train_epoch_loss/len(train_dataset))}, avg validation loss:{(validation_epoch_loss/len(valid_dataset))}')
    print(f'precision:{precise_macro}, recall:{recall_macro}, f1:{f1_macro}')
    # if best_f1<f1_macro:
    print('--'*50)



100%|██████████| 20/20 [00:00<00:00, 60.74it/s, training loss=1.41]
100%|██████████| 17/17 [00:00<00:00, 268.33it/s, validation loss=1.4]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 0
avg training loss: 0.011946265470187503, avg validation loss:0.002605934568497328
precision:0.007434944237918215, recall:0.25, f1:0.014440433212996389
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 91.44it/s, training loss=1.4]
100%|██████████| 17/17 [00:00<00:00, 252.25it/s, validation loss=1.48]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 1
avg training loss: 0.011144040826311328, avg validation loss:0.0027590159146759145
precision:0.007434944237918215, recall:0.25, f1:0.014440433212996389
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 92.21it/s, training loss=1.4]
100%|██████████| 17/17 [00:00<00:00, 254.30it/s, validation loss=1.36]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 2
avg training loss: 0.011153059743722216, avg validation loss:0.0025368651936045366
precision:0.041821561338289966, recall:0.25, f1:0.07165605095541402
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 92.90it/s, training loss=1.4]
100%|██████████| 17/17 [00:00<00:00, 240.22it/s, validation loss=1.36]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 3
avg training loss: 0.011085310874348577, avg validation loss:0.002527151852292199
precision:0.041821561338289966, recall:0.25, f1:0.07165605095541402
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 93.52it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 270.65it/s, validation loss=1.4]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 4
avg training loss: 0.011023697265363282, avg validation loss:0.0025964312837026375
precision:0.011617100371747211, recall:0.25, f1:0.02220248667850799
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 93.64it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 262.81it/s, validation loss=1.41]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 5
avg training loss: 0.011010419908728216, avg validation loss:0.0026265196640695336
precision:0.011617100371747211, recall:0.25, f1:0.02220248667850799
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 92.71it/s, training loss=1.4]
100%|██████████| 17/17 [00:00<00:00, 263.79it/s, validation loss=1.41]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 6
avg training loss: 0.011017295198703031, avg validation loss:0.002614932432493756
precision:0.011617100371747211, recall:0.25, f1:0.02220248667850799
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 93.41it/s, training loss=1.41]
100%|██████████| 17/17 [00:00<00:00, 259.07it/s, validation loss=1.39]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 7
avg training loss: 0.011018814439549121, avg validation loss:0.002591462383482979
precision:0.041821561338289966, recall:0.25, f1:0.07165605095541402
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 92.88it/s, training loss=1.4]
100%|██████████| 17/17 [00:00<00:00, 255.28it/s, validation loss=1.44]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 8
avg training loss: 0.011004356164403489, avg validation loss:0.002672557050853857
precision:0.007434944237918215, recall:0.25, f1:0.014440433212996389
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 93.08it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 270.12it/s, validation loss=1.41]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 9
avg training loss: 0.011021092492458494, avg validation loss:0.002623176042918379
precision:0.011617100371747211, recall:0.25, f1:0.02220248667850799
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 93.45it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 273.21it/s, validation loss=1.35]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 10
avg training loss: 0.011018553244244972, avg validation loss:0.0025174637709408443
precision:0.18912639405204462, recall:0.25, f1:0.21534391534391537
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 89.96it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 271.12it/s, validation loss=1.36]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 11
avg training loss: 0.010992148457935334, avg validation loss:0.0025231432737471003
precision:0.18912639405204462, recall:0.25, f1:0.21534391534391537
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 94.33it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 248.80it/s, validation loss=1.36]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 12
avg training loss: 0.01098358041506913, avg validation loss:0.0025360683969405506
precision:0.18912639405204462, recall:0.25, f1:0.21534391534391537
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 93.03it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 240.93it/s, validation loss=1.36]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 13
avg training loss: 0.010983362776090959, avg validation loss:0.002530344578413272
precision:0.041821561338289966, recall:0.25, f1:0.07165605095541402
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 89.96it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 263.12it/s, validation loss=1.36]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 14
avg training loss: 0.01098065515129415, avg validation loss:0.002528593221118459
precision:0.041821561338289966, recall:0.25, f1:0.07165605095541402
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 94.18it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 252.87it/s, validation loss=1.36]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 15
avg training loss: 0.010978262025995182, avg validation loss:0.0025314225583271464
precision:0.041821561338289966, recall:0.25, f1:0.07165605095541402
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 92.70it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 238.06it/s, validation loss=1.36]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 16
avg training loss: 0.010977923418356215, avg validation loss:0.0025300197441781762
precision:0.041821561338289966, recall:0.25, f1:0.07165605095541402
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 87.84it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 247.82it/s, validation loss=1.36]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 17
avg training loss: 0.010976958094149126, avg validation loss:0.0025304928145000924
precision:0.041821561338289966, recall:0.25, f1:0.07165605095541402
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 92.49it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 244.34it/s, validation loss=1.36]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 18
avg training loss: 0.010976229001194725, avg validation loss:0.002529727260419427
precision:0.041821561338289966, recall:0.25, f1:0.07165605095541402
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 90.33it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 239.51it/s, validation loss=1.36]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 19
avg training loss: 0.010975063298867906, avg validation loss:0.002532472397758172
precision:0.041821561338289966, recall:0.25, f1:0.07165605095541402
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 85.87it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 203.51it/s, validation loss=1.36]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 20
avg training loss: 0.010975202669600923, avg validation loss:0.002532274084906596
precision:0.041821561338289966, recall:0.25, f1:0.07165605095541402
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 91.98it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 270.27it/s, validation loss=1.36]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 21
avg training loss: 0.010975197201284889, avg validation loss:0.0025370194123136955
precision:0.041821561338289966, recall:0.25, f1:0.07165605095541402
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 90.78it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 270.37it/s, validation loss=1.37]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 22
avg training loss: 0.010976590242907019, avg validation loss:0.002541021786657851
precision:0.041821561338289966, recall:0.25, f1:0.07165605095541402
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 86.76it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 210.23it/s, validation loss=1.38]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 23
avg training loss: 0.010978953221141936, avg validation loss:0.0025622966564277736
precision:0.041821561338289966, recall:0.25, f1:0.07165605095541402
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 89.41it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 233.94it/s, validation loss=1.41]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 24
avg training loss: 0.010986127794430842, avg validation loss:0.0026144442948266918
precision:0.007434944237918215, recall:0.25, f1:0.014440433212996389
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 88.02it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 237.65it/s, validation loss=1.49]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 25
avg training loss: 0.010994546148089427, avg validation loss:0.002761392123637146
precision:0.007434944237918215, recall:0.25, f1:0.014440433212996389
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 88.28it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 227.91it/s, validation loss=1.56]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 26
avg training loss: 0.011004726250522585, avg validation loss:0.0028909438161601807
precision:0.011617100371747211, recall:0.25, f1:0.02220248667850799
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 90.93it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 277.44it/s, validation loss=1.56]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 27
avg training loss: 0.011014914484103933, avg validation loss:0.0028991513092721704
precision:0.011617100371747211, recall:0.25, f1:0.02220248667850799
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 89.88it/s, training loss=1.38]
100%|██████████| 17/17 [00:00<00:00, 229.62it/s, validation loss=1.52]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 28
avg training loss: 0.011012439999772488, avg validation loss:0.00282793550243165
precision:0.011617100371747211, recall:0.25, f1:0.02220248667850799
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 82.85it/s, training loss=1.38]
100%|██████████| 17/17 [00:00<00:00, 197.78it/s, validation loss=1.44]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 29
avg training loss: 0.011038269091280133, avg validation loss:0.0026792482815710585
precision:0.011617100371747211, recall:0.25, f1:0.02220248667850799
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 87.33it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 227.28it/s, validation loss=1.39]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 30
avg training loss: 0.011035761226445591, avg validation loss:0.0025789177107545078
precision:0.0111731843575419, recall:0.24, f1:0.021352313167259787
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 84.96it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 226.27it/s, validation loss=1.45]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 31
avg training loss: 0.011158812317089298, avg validation loss:0.002690388589100324
precision:0.011617100371747211, recall:0.25, f1:0.02220248667850799
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 88.22it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 221.50it/s, validation loss=1.38]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 32
avg training loss: 0.011073556895459559, avg validation loss:0.0025657384369009934
precision:0.18912639405204462, recall:0.25, f1:0.21534391534391537
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 86.59it/s, training loss=1.39]
100%|██████████| 17/17 [00:00<00:00, 194.60it/s, validation loss=1.35]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 33
avg training loss: 0.01099288958118501, avg validation loss:0.0025064353606071613
precision:0.041821561338289966, recall:0.25, f1:0.07165605095541402
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 90.11it/s, training loss=1.33]
100%|██████████| 17/17 [00:00<00:00, 263.77it/s, validation loss=1.52]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 34
avg training loss: 0.010915226321986913, avg validation loss:0.0028258462377640393
precision:0.011617100371747211, recall:0.25, f1:0.02220248667850799
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 90.80it/s, training loss=1.1]
100%|██████████| 17/17 [00:00<00:00, 241.85it/s, validation loss=1.58]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 35
avg training loss: 0.010626610551262738, avg validation loss:0.00293155008975458
precision:0.028404907975460122, recall:0.354375, f1:0.05109647929477063
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 89.65it/s, training loss=0.942]
100%|██████████| 17/17 [00:00<00:00, 243.43it/s, validation loss=1.29]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 36
avg training loss: 0.008920440718525664, avg validation loss:0.0023923228221311888
precision:0.2562105079894454, recall:0.3240615615615615, f1:0.11561416553074129
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 92.14it/s, training loss=0.856]
100%|██████████| 17/17 [00:00<00:00, 246.00it/s, validation loss=1.25]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 37
avg training loss: 0.0076036838168968566, avg validation loss:0.0023269808425336077
precision:0.23047391060924782, recall:0.3373326167076167, f1:0.2025664695940213
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 90.71it/s, training loss=0.667]
100%|██████████| 17/17 [00:00<00:00, 236.80it/s, validation loss=1.07]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 38
avg training loss: 0.00634256833855926, avg validation loss:0.0019859361382665243
precision:0.25807186798793924, recall:0.36710124897624896, f1:0.2095638707914987
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 90.77it/s, training loss=0.59]
100%|██████████| 17/17 [00:00<00:00, 217.15it/s, validation loss=0.961]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 39
avg training loss: 0.005098132900284447, avg validation loss:0.0017856528989444434
precision:0.07674310983134514, recall:0.3665277777777778, f1:0.11030870791112138
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 91.23it/s, training loss=0.606]
100%|██████████| 17/17 [00:00<00:00, 250.63it/s, validation loss=0.872]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 40
avg training loss: 0.004778383842825081, avg validation loss:0.0016216594711998581
precision:0.2608085466595378, recall:0.3278228228228228, f1:0.24789746883766953
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 91.43it/s, training loss=0.504]
100%|██████████| 17/17 [00:00<00:00, 215.80it/s, validation loss=1.04]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 41
avg training loss: 0.004226397923563503, avg validation loss:0.0019350754284061021
precision:0.251871518343615, recall:0.3287878787878788, f1:0.19025040702064952
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 86.28it/s, training loss=0.377]
100%|██████████| 17/17 [00:00<00:00, 236.58it/s, validation loss=0.866]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 42
avg training loss: 0.0035821955827873542, avg validation loss:0.0016092234827771949
precision:0.2965167627383443, recall:0.39744693557193556, f1:0.2150121322052621
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 89.71it/s, training loss=0.402]
100%|██████████| 17/17 [00:00<00:00, 258.23it/s, validation loss=0.887]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 43
avg training loss: 0.0034898583233760657, avg validation loss:0.001648029651783656
precision:0.3091722632254782, recall:0.40126228501228506, f1:0.20981732720863155
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 88.81it/s, training loss=0.381]
100%|██████████| 17/17 [00:00<00:00, 246.20it/s, validation loss=0.912]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 44
avg training loss: 0.0034163779930902944, avg validation loss:0.0016955859599060285
precision:0.297842443365808, recall:0.40309002184002185, f1:0.25717059490018357
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 93.16it/s, training loss=0.332]
100%|██████████| 17/17 [00:00<00:00, 250.75it/s, validation loss=0.889]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 45
avg training loss: 0.0032315430861808983, avg validation loss:0.001651910047105697
precision:0.2931731274680753, recall:0.37822123259623264, f1:0.29256511783498473
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 90.24it/s, training loss=0.386]
100%|██████████| 17/17 [00:00<00:00, 240.36it/s, validation loss=0.944]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 46
avg training loss: 0.0031234382703383226, avg validation loss:0.001754800958704328
precision:0.3040036515908889, recall:0.4047186390936391, f1:0.26863081719307647
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 88.16it/s, training loss=0.388]
100%|██████████| 17/17 [00:00<00:00, 253.30it/s, validation loss=1.6]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 47
avg training loss: 0.003115239820254006, avg validation loss:0.0029782156518843982
precision:0.2963058476907353, recall:0.37106606606606607, f1:0.28728544020429864
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 88.96it/s, training loss=0.406]
100%|██████████| 17/17 [00:00<00:00, 239.60it/s, validation loss=1.85]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 48
avg training loss: 0.003243413665021707, avg validation loss:0.003444717054473423
precision:0.31339285714285714, recall:0.33730770543270544, f1:0.30863376157493805
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 88.60it/s, training loss=0.53]
100%|██████████| 17/17 [00:00<00:00, 254.50it/s, validation loss=1.72]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 49
avg training loss: 0.0036541790209969156, avg validation loss:0.0031911901381822326
precision:0.30955091506713495, recall:0.36677484302484303, f1:0.21667885206191656
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 91.96it/s, training loss=0.672]
100%|██████████| 17/17 [00:00<00:00, 257.94it/s, validation loss=0.888]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 50
avg training loss: 0.003977914581890357, avg validation loss:0.001650575146799194
precision:0.29527914614121514, recall:0.3386936936936937, f1:0.18725168465091682
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 93.60it/s, training loss=0.666]
100%|██████████| 17/17 [00:00<00:00, 261.63it/s, validation loss=1.86]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 51
avg training loss: 0.00417544182477646, avg validation loss:0.0034488658922755587
precision:0.3195572214545383, recall:0.31968041905541905, f1:0.289544079792234
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 92.17it/s, training loss=0.439]
100%|██████████| 17/17 [00:00<00:00, 247.85it/s, validation loss=1.33]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 52
avg training loss: 0.003379205058283857, avg validation loss:0.002471450979381689
precision:0.30846413790066923, recall:0.3154789448539449, f1:0.30219026052745945
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 91.58it/s, training loss=0.479]
100%|██████████| 17/17 [00:00<00:00, 266.80it/s, validation loss=2.22]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 53
avg training loss: 0.0027962235468623644, avg validation loss:0.004126156129801583
precision:0.3690160333219217, recall:0.31919993857493856, f1:0.31746357605473025
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 92.03it/s, training loss=0.389]
100%|██████████| 17/17 [00:00<00:00, 257.92it/s, validation loss=2.53]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 54
avg training loss: 0.0026119795747139942, avg validation loss:0.0047048064412680704
precision:0.3400267923133777, recall:0.3119231162981163, f1:0.31681224101227595
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 92.67it/s, training loss=0.368]
100%|██████████| 17/17 [00:00<00:00, 248.50it/s, validation loss=3.31]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 55
avg training loss: 0.002113344603246743, avg validation loss:0.006148750454076604
precision:0.35179507113609176, recall:0.3160467854217854, f1:0.316892328854202
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 90.19it/s, training loss=0.361]
100%|██████████| 17/17 [00:00<00:00, 234.65it/s, validation loss=3.52]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 56
avg training loss: 0.0021856788517613784, avg validation loss:0.006538845349421732
precision:0.38329583802024747, recall:0.33949341386841386, f1:0.33019944613169094
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 90.62it/s, training loss=0.258]
100%|██████████| 17/17 [00:00<00:00, 245.05it/s, validation loss=3.25]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 57
avg training loss: 0.0021633853950869483, avg validation loss:0.006036336094030217
precision:0.32239549330326234, recall:0.3298646942396943, f1:0.3056519474607342
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 90.66it/s, training loss=0.289]
100%|██████████| 17/17 [00:00<00:00, 242.93it/s, validation loss=2.06]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 58
avg training loss: 0.0022278548261774078, avg validation loss:0.0038332190212263936
precision:0.34028091792054116, recall:0.4059833128583128, f1:0.3423392403689411
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 91.56it/s, training loss=0.199]
100%|██████████| 17/17 [00:00<00:00, 227.10it/s, validation loss=4.18]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 59
avg training loss: 0.0018781277172395228, avg validation loss:0.007769474752773582
precision:0.33495959393575103, recall:0.39072942260442256, f1:0.3373444369770551
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 92.73it/s, training loss=0.205]
100%|██████████| 17/17 [00:00<00:00, 238.22it/s, validation loss=3.72]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 60
avg training loss: 0.002275899795597656, avg validation loss:0.006910276235700983
precision:0.3288579212430602, recall:0.37004572754572757, f1:0.3299558741599662
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 91.38it/s, training loss=0.234]
100%|██████████| 17/17 [00:00<00:00, 236.66it/s, validation loss=2.92]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 61
avg training loss: 0.0018881982631496952, avg validation loss:0.00542908884778785
precision:0.3364466902946095, recall:0.3410848348348348, f1:0.33617602503169797
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 90.83it/s, training loss=0.304]
100%|██████████| 17/17 [00:00<00:00, 229.78it/s, validation loss=2.78]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 62
avg training loss: 0.0019513163485277874, avg validation loss:0.005165925699538901
precision:0.36479529184063225, recall:0.3415543953043953, f1:0.3477526694438459
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 90.28it/s, training loss=0.343]
100%|██████████| 17/17 [00:00<00:00, 250.14it/s, validation loss=2.23]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 63
avg training loss: 0.0018231874539122717, avg validation loss:0.004152353368284091
precision:0.3875127389197396, recall:0.34361298798798795, f1:0.337138300296195
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 91.61it/s, training loss=0.226]
100%|██████████| 17/17 [00:00<00:00, 252.54it/s, validation loss=2.72]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 64
avg training loss: 0.0016167590942190709, avg validation loss:0.005063435845215524
precision:0.425692742176884, recall:0.36072856947856946, f1:0.3590100827039556
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 90.60it/s, training loss=0.215]
100%|██████████| 17/17 [00:00<00:00, 198.97it/s, validation loss=2.34]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 65
avg training loss: 0.0014482355380514775, avg validation loss:0.004349020333981425
precision:0.363116039651402, recall:0.39972495222495225, f1:0.3735983961086937
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 85.79it/s, training loss=0.191]
100%|██████████| 17/17 [00:00<00:00, 246.31it/s, validation loss=2.46]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 66
avg training loss: 0.0014071469145036095, avg validation loss:0.004576778766391003
precision:0.32777651166436955, recall:0.3713875238875239, f1:0.3354006471754048
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 89.10it/s, training loss=0.238]
100%|██████████| 17/17 [00:00<00:00, 201.22it/s, validation loss=2.44]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 67
avg training loss: 0.0014125908281777454, avg validation loss:0.004537412225092211
precision:0.3417983058608059, recall:0.39849901037401037, f1:0.34492671532149755
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 88.33it/s, training loss=0.251]
100%|██████████| 17/17 [00:00<00:00, 274.35it/s, validation loss=2.23]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 68
avg training loss: 0.0014396629184425043, avg validation loss:0.004147338601293174
precision:0.3483708456277591, recall:0.4225962325962326, f1:0.34825159848962645
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 88.96it/s, training loss=0.272]
100%|██████████| 17/17 [00:00<00:00, 261.27it/s, validation loss=1.7]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 69
avg training loss: 0.0014704573835611439, avg validation loss:0.0031559662304846327
precision:0.33115046059780406, recall:0.38585193147693153, f1:0.31646530126173894
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 89.37it/s, training loss=0.22]
100%|██████████| 17/17 [00:00<00:00, 255.96it/s, validation loss=2.05]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 70
avg training loss: 0.0017278233805309504, avg validation loss:0.0038192635575191682
precision:0.3506395545232156, recall:0.4190050846300846, f1:0.33476262541337365
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 86.43it/s, training loss=0.228]
100%|██████████| 17/17 [00:00<00:00, 208.39it/s, validation loss=2.26]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 71
avg training loss: 0.001682614285749984, avg validation loss:0.004195282893553099
precision:0.3584943783900284, recall:0.39831217581217576, f1:0.3578514213351726
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 88.39it/s, training loss=0.364]
100%|██████████| 17/17 [00:00<00:00, 215.66it/s, validation loss=3.86]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 72
avg training loss: 0.0016876305057843652, avg validation loss:0.007175329449451546
precision:0.36225915942897075, recall:0.34791274228774227, f1:0.34208825735818016
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 91.05it/s, training loss=0.45]
100%|██████████| 17/17 [00:00<00:00, 241.54it/s, validation loss=4.19]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 73
avg training loss: 0.001749147162120752, avg validation loss:0.0077866763430457136
precision:0.42863590657708306, recall:0.34680163117663115, f1:0.3402399464861191
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 91.48it/s, training loss=0.167]
100%|██████████| 17/17 [00:00<00:00, 269.77it/s, validation loss=3.89]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 74
avg training loss: 0.00139847072380018, avg validation loss:0.007230664274506409
precision:0.35329501773175, recall:0.3737303781053781, f1:0.3479975855633537
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 94.02it/s, training loss=0.102]
100%|██████████| 17/17 [00:00<00:00, 227.08it/s, validation loss=3.1]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 75
avg training loss: 0.0010385666537389463, avg validation loss:0.005760727761846492
precision:0.38037623223658945, recall:0.4448324460824461, f1:0.3920983778126635
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 86.05it/s, training loss=0.0835]
100%|██████████| 17/17 [00:00<00:00, 246.13it/s, validation loss=2.81]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 76
avg training loss: 0.0009762771341445963, avg validation loss:0.005232012847985476
precision:0.36567522757397797, recall:0.4258154176904177, f1:0.37713655155411424
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 82.16it/s, training loss=0.114]
100%|██████████| 17/17 [00:00<00:00, 257.88it/s, validation loss=3.08]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 77
avg training loss: 0.0009223391403346409, avg validation loss:0.005733383189346711
precision:0.35607755581668626, recall:0.38424293611793614, f1:0.36502641153803944
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 90.95it/s, training loss=0.0785]
100%|██████████| 17/17 [00:00<00:00, 277.35it/s, validation loss=3.63]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 78
avg training loss: 0.000611950755178762, avg validation loss:0.006744942700552674
precision:0.36945807154413446, recall:0.4048844867594868, f1:0.3812047376558103
----------------------------------------------------------------------------------------------------


100%|██████████| 20/20 [00:00<00:00, 84.05it/s, training loss=0.0828]
100%|██████████| 17/17 [00:00<00:00, 211.88it/s, validation loss=3.69]



----------------------------------------  Validation Scores  ----------------------------------------
EPOCH: 79
avg training loss: 0.0007603637663120761, avg validation loss:0.006850207605326486
precision:0.3645596786970246, recall:0.42534995222495225, f1:0.38030745207537864
----------------------------------------------------------------------------------------------------


## TESTING

In [None]:
!ls drive/MyDrive/others/Test/Trac1_test/

bangla_test.csv        combine_test.csv     hin_test_.gsheet	mag_test.gsheet
bangla_test.gsheet     combine_test.gsheet  hin_test_w_ids.csv	mag_test_w_ids.csv
bangla_test_w_ids.csv  hin_test_.csv	    mag_test.csv


In [None]:
test_df = pd.read_csv('drive/MyDrive/others/Test/Trac1_test/hin_test_w_ids.csv')
test_df['sentence'] = valid_df['sentence'].str.lower()


test_df['sentence'] = test_df['sentence'].apply(remove_punct_one)
test_dataloader = DataLoader(test_dataset, batch_size=16, shuffle=False)


In [None]:
preds, stats = eval_loop(best_model, test_dataloader)

100%|██████████| 34/34 [00:00<00:00, 474.84it/s]


In [None]:
stats

{'macro_precision': 0.25320348173515983,
 'macro_recall': 0.244972692304153,
 'macro_f1': 0.2416986809554397}

In [None]:
key_to_val = {v:k for k,v in test_dataset.label_encoding.items()}
decoded_pred = [key_to_val[pred] for pred in preds]

In [None]:
test_df['pred'] = decoded_pred

In [None]:
correct = test_df[test_df['sentiment'] == test_df['pred']]

In [None]:
correct['sentiment'].value_counts()


positive    246
negative     26
mix           2
Name: sentiment, dtype: int64