In [None]:
colab = True

In [None]:
if colab:
  from google.colab import drive
  drive.mount('/content/drive/')

  import sys
  sys.path.append('/content/drive/My\ Drive/dubuce/lab3/')
  %cd /content/drive/My\ Drive/dubuce/lab3/


Mounted at /content/drive/
/content/drive/My Drive/dubuce/lab3


#Zadatak 1. Učitavanje podataka (25% bodova)

In [None]:
import torch
import torch.nn as nn
import pandas as pd
import sklearn
from torch.utils.data import DataLoader, Dataset
from sklearn.metrics import confusion_matrix
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset
from dataclasses import dataclass
from typing import *


TRAIN_PATH = 'sst_train_raw.csv'
VAL_PATH = 'sst_valid_raw.csv'
TEST_PATH = 'sst_test_raw.csv'

@dataclass
class Instance:
  df = None

  def __len__(self):
    return len(self.df)

  def __getitem__(self, index):
    return self.df.iloc[index, 0].strip().split(), self.df.iloc[index, 1]


class Vocab:
  """
  vokabular se izgrađuje samo na train skupu podataka
  Jednom izgrađeni vokabular na train skupu postavljate kao
  vokabular testnog i validacijskog skupa podataka
  smatra najkorektniji u analizi teksta jer kroz izgradnju vokabulara na
  testnom i validacijskom skupu imamo curenje informacija u treniranje modela
  """
  itos = {} # index-to-string
  stoi = {} # string-to-index

  def __init__(self, frequencies, max_size, min_freq, text_vocab=True):
    if text_vocab:
      self.itos = {0: "<PAD>", 1: "<UNK>"}
      self.stoi = {"<PAD>": 0, "<UNK>": 1}

      cnt = len(self.stoi)
      for key, value in sorted(frequencies.items(), key=lambda item: item[1], reverse=True):
        if max_size == -1:
          if value > min_freq:
            self.stoi[key] = cnt
            self.itos[cnt] = key
        elif len(self.stoi) > max_size:
          break
        cnt += 1
    else:
      self.itos = {0: "positive", 1: "negative"}
      self.stoi = {"positive": 0, "negative": 1}

  def encode(self, sequence):
    encoded_sequence = []

    if isinstance(sequence, str):
      return torch.tensor(self.stoi[sequence.strip()])

    for word in sequence:
      token_for_word = self.stoi.get(word)
      if token_for_word is None:
        encoded_sequence.append(self.stoi.get('<UNK>')) #JEL OVO OK?
      else:
        encoded_sequence.append(token_for_word)

    return torch.tensor(encoded_sequence)


class NLPDataset(Dataset):

  instances = Instance()
  text_vocab = None
  label_vocab = None
  def __init__(self, csv_file):
    self.instances.df = pd.read_csv(csv_file, header=None)

  def __len__(self):
    return len(self.instances)

  def __getitem__(self, index):
    instance_text, instance_label = self.instances[index]
    return self.text_vocab.encode(instance_text), self.label_vocab.encode(instance_label)



def count_frequency(path):
  df = pd.read_csv(path, header=None)
  frequencies = {}
  for row in range(len(df)):
    for word in df.iloc[row, 0].split():
      if word not in frequencies.keys():
        frequencies[word] = 1
      else:
        frequencies[word] += 1

  return frequencies


def generate_word_embeddings(vocab: Vocab, which='normal', filename=None):
  embeddings = dict(zip(vocab.stoi.keys(), torch.normal(0, 1, size=(len(vocab.stoi), 300))))
  embeddings['<PAD>'] = torch.zeros(300)
  if which == 'file':
    with open(filename, 'r') as file:
      lines = file.readlines()
      for line in lines:
        tmp = line.strip().split()
        embeddings[tmp[0]] = torch.tensor([float(num) for num in tmp[1:]])

  return torch.stack(list(embeddings.values()))


def pad_collate_fn(batch, pad_index=0):
  """
  Arguments:
    Batch:
      list of Instances returned by `Dataset.__getitem__`.
  Returns:
    A tensor representing the input batch.
  """
  texts, labels = zip(*batch) # Assuming the instance is in tuple-like form
  lengths = torch.tensor([len(text) for text in texts]) # Needed for later

  return pad_sequence([text for text in texts], batch_first=True, padding_value=pad_index), torch.tensor(list(labels)), lengths



In [None]:
if __name__ == '__main__':
  train_dataset = NLPDataset(TRAIN_PATH)
  instance_text, instance_label = train_dataset.instances[3]
  print(f"Text: {instance_text}")
  print(f"Label: {instance_label}")

  frequencies = count_frequency(TRAIN_PATH)

  text_vocab = Vocab(frequencies, max_size=-1, min_freq=0)
  label_vocab = Vocab(None, None, None, False)

  train_dataset.text_vocab = text_vocab
  train_dataset.label_vocab = label_vocab

  numericalized_text, numericalized_label = train_dataset[3]
  print(f"Numericalized text: {numericalized_text}")
  print(f"Numericalized label: {numericalized_label}")

  normal_embeddings = generate_word_embeddings(text_vocab)
  embeddings_from_txt = generate_word_embeddings(text_vocab, which='file', filename='sst_glove_6b_300d.txt')

  normal_embeddings = nn.Embedding.from_pretrained(normal_embeddings, freeze=False)
  embeddings_from_txt = nn.Embedding.from_pretrained(embeddings_from_txt, freeze=True)
  # print(normal_embeddings(torch.LongTensor([10])))
  # print(embeddings_from_txt(torch.LongTensor([10])))

  batch_size = 2 # Only for demonstrative purposes
  shuffle = False # Only for demonstrative purposes

  train_dataloader = DataLoader(dataset=train_dataset, batch_size=batch_size,
                                shuffle=shuffle, collate_fn=pad_collate_fn)

  texts, labels, lengths = next(iter(train_dataloader))
  print(f"Texts: {texts}")
  print(f"Labels: {labels}")
  print(f"Lengths: {lengths}")

Text: ['yet', 'the', 'act', 'is', 'still', 'charming', 'here']
Label:  positive
Numericalized text: tensor([189,   2, 674,   7, 129, 348, 143])
Numericalized label: 0
Texts: tensor([[   2,  554,    7, 2872,    6,   22,    2, 2873, 1236,    8,   96, 4800,
            4,   10,   72,    8,  242,    6,   75,    3, 3576,   56, 3577,   34,
         2022, 2874, 7123, 3578, 7124,   42,  779, 7125,    0,    0],
        [   2, 2875, 2023, 4801,    5,    2, 3579,    5,    2, 2876, 4802,    7,
           40,  829,   10,    3, 4803,    5,  627,   62,   27, 2877, 2024, 4804,
          962,  715,    8, 7126,  555,    5, 7127, 4805,    8, 7128]])
Labels: tensor([0, 0])
Lengths: tensor([32, 34])


#Zadatak 2. Implementacija baseline modela (25% bodova)

In [None]:
import torch
import numpy as np
import torch.nn as nn
import pandas as pd
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader, Dataset
from dataclasses import dataclass
from typing import *
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

TRAIN_PATH = 'sst_train_raw.csv'
VAL_PATH = 'sst_valid_raw.csv'
TEST_PATH = 'sst_test_raw.csv'


class BaselineModel(nn.Module):
  """
  avg_pool() -> fc(300, 150) -> ReLU() -> fc(150, 150) -> ReLU() -> fc(150,1)
  """
  def __init__(self, embedding):
    super().__init__()
    self.embedding = embedding
    self.fc1 = nn.Linear(300, 150)
    self.relu1 = nn.ReLU()
    self.fc2 = nn.Linear(150, 150)
    self.relu2 = nn.ReLU()
    self.fc3 = nn.Linear(150, 1)

  def forward(self, x):
    x = self.embedding(x)
    x = torch.mean(x, dim=1)
    x = self.fc1(x)
    x = self.relu1(x)
    x = self.fc2(x)
    x = self.relu2(x)
    x = self.fc3(x)

    return x


def train(model, data, optimizer, criterion, args):
  model.train()
  epoch_loss = []
  for batch_num, batch in enumerate(data):
    model.zero_grad()
    x, y, x_len = batch
    logits = model(x)
    loss = criterion(logits, y.float().unsqueeze(1))
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), args['clip'])
    optimizer.step()
    epoch_loss.append(loss.detach().item())

  return sum(epoch_loss) / (len(data))


def evaluate(model, data, criterion, args):
  model.eval()
  y_ = []
  y_pred = []
  with torch.no_grad():
    for batch_num, batch in enumerate(data):
      x, y, x_len = batch
      logits = model(x)
      loss = criterion(logits, y.float().unsqueeze(1))
      y_pred.extend([round(torch.sigmoid(l).item()) for l in logits.squeeze(1)])
      y_.extend([y_i.item() for y_i in y])

  return eval_perf_binary(np.array(y_), np.array(y_pred))


def eval_perf_binary(y_true, y_pred):
  accuracy = accuracy_score(y_true, y_pred)
  precision = precision_score(y_true, y_pred)
  recall = recall_score(y_true, y_pred)
  f1 = f1_score(y_true, y_pred)
  M = confusion_matrix(y_true, y_pred)

  return accuracy, precision, recall, f1, M


def display_measurements(accuracy, recall, precision, f1_score, M, epoch=0, which='valid'):
  if epoch != 0:
    print(f'Epoch {epoch}:')
  print(f'{which} accuracy = {accuracy*100:.3f}%')
  print(f'{which} recall = {recall*100:.3f}%')
  print(f'{which} precision = {precision*100:.3f}%')
  print(f'{which} f1_score = {f1_score*100:.3f}%')
  print(f'{which} confusion matrix: \n{M}')

  return


def load_dataset(args):
  return NLPDataset(args['TRAIN_PATH']), NLPDataset(args['VAL_PATH']), NLPDataset(args['TEST_PATH'])


def initialize_model(embedding, which_model='baseline'):
  if which_model == 'baseline':
    return BaselineModel(embedding)
  elif which_model == 'rnn':
    return RNNModel(embedding)
  elif which_model == 'gru':
    return GRUModel(embedding)
  elif which_model == 'rnn_with_attention':
    return RNNwithAttentionModel(embedding)

  return LSTMModel(embedding)


def main(args):
  seed = args['seed']
  np.random.seed(seed)
  torch.manual_seed(seed)

  train_dataset, valid_dataset, test_dataset = load_dataset(args)

  frequencies = count_frequency(args['TRAIN_PATH'])

  text_vocab = Vocab(frequencies, max_size=args['max_size'], min_freq=args['min_freq'])
  label_vocab = Vocab(None, None, None, False)

  train_dataset.text_vocab = text_vocab
  train_dataset.label_vocab = label_vocab
  valid_dataset.text_vocab = text_vocab
  valid_dataset.label_vocab = label_vocab
  test_dataset.text_vocab = text_vocab
  test_dataset.label_vocab = label_vocab

  embedding = None
  if args['which_embedding'] == 'normal':
    normal_embeddings = generate_word_embeddings(text_vocab)
    embedding = nn.Embedding.from_pretrained(normal_embeddings, freeze=args['freeze'])
  else:
    embeddings_from_txt = generate_word_embeddings(text_vocab, which='file', filename='sst_glove_6b_300d.txt')
    embedding = nn.Embedding.from_pretrained(embeddings_from_txt, freeze=args['freeze'])

  model = initialize_model(embedding, args['which_model'])

  criterion = nn.BCEWithLogitsLoss()
  optimizer = torch.optim.Adam(model.parameters(), lr=args['learning_rate'])

  train_batch_size = args['train_batch_size']
  valid_batch_size = args['valid_batch_size']
  test_batch_size = args['test_batch_size']

  train_dataloader = DataLoader(dataset=train_dataset, batch_size=train_batch_size,
                                shuffle=True, collate_fn=pad_collate_fn)

  valid_dataloader = DataLoader(dataset=valid_dataset, batch_size=valid_batch_size,
                                shuffle=False, collate_fn=pad_collate_fn)

  test_dataloader = DataLoader(dataset=test_dataset, batch_size=test_batch_size,
                                shuffle=False, collate_fn=pad_collate_fn)
  train_loss = None
  for epoch in range(args['epochs']):
    loss = train(model, train_dataloader, optimizer, criterion, args)
    #print(f'Train loss: {loss}')
    accuracy, recall, precision, f1, M = evaluate(model, valid_dataloader, criterion, args)
    #display_measurements(accuracy, recall, precision, f1, M, epoch+1)

  accuracy, recall, precision, f1, M = evaluate(model, test_dataloader, criterion, args)
  display_measurements(accuracy, recall, precision, f1, M, which='test')

  return train_loss, accuracy, recall, precision, f1, M


In [None]:
args = {
      'seed': 7052020,
      'TRAIN_PATH': 'sst_train_raw.csv',
      'VAL_PATH': 'sst_valid_raw.csv',
      'TEST_PATH': 'sst_test_raw.csv',
      'which_model': 'baseline',
      'shuffle': True,
      'freeze': True,
      'epochs': 10,
      'train_batch_size': 10,
      'valid_batch_size': 32,
      'test_batch_size': 32,
      'which_embedding': 'text',
      'max_size': -1,
      'min_freq': 0,
      'learning_rate': 1e-4,
      'clip': 0.5, # https://neptune.ai/blog/understanding-gradient-clipping-and-how-it-can-fix-exploding-gradients-problem
      'metrics': ('accuracy','f1_score', 'confusion_matrix')
}

_, _, _, _, _, _ = main(args)

test accuracy = 76.835%
test recall = 75.450%
test precision = 78.271%
test f1_score = 76.835%
test confusion matrix: 
[[335 109]
 [ 93 335]]


Za 5 epoha:

| Seed  | Test Accuracy | Train Loss |
|:--------:|:--------:|:--------:|
|  7052020 |  71.445%   |  0.648687137121504   |
|  7052021 |  66.858%   |  0.6540305221622641   |
|  15052024 |  67.431%   |  0.6584195623343642   |
|  7042024 |  69.954%   |  0.6512394100427628   |
|  7052024 |  70.413%   |  0.6490043273026292   |

#Zadatak 3. Implementacija povratne neuronske mreže (25% bodova)

In [None]:
class RNNModel(nn.Module):
  """
  rnn(150) -> rnn(150) -> fc(150, 150) -> ReLU() -> fc(150,1)
  """
  def __init__(self, embedding, hidden_size=150, num_layers=2, dropout=0, bidirectional=False):
    super().__init__()
    self.embedding = embedding
    self.rnn1 = nn.RNN(input_size=300, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional)
    self.fc1 = nn.Linear(hidden_size * 2 if bidirectional else hidden_size, hidden_size)
    self.relu1 = nn.ReLU()
    self.fc2 = nn.Linear(hidden_size, 1)

  def forward(self, x): # (N, L)
    x = x.transpose(0, 1)
    x = self.embedding(x) # (N, L, D)
    x, h1 = self.rnn1(x) # (N, L, )
    x = self.fc1(x[-1]) # (N, D)
    x = self.relu1(x)
    x = self.fc2(x)

    return x

class GRUModel(nn.Module):
  """
  rnn(150) -> rnn(150) -> fc(150, 150) -> ReLU() -> fc(150,1)
  """
  def __init__(self, embedding, hidden_size=150, num_layers=2, dropout=0, bidirectional=False):
    super().__init__()
    self.embedding = embedding
    self.gru1 = nn.GRU(input_size=300, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional)
    self.fc1 = nn.Linear(hidden_size * 2 if bidirectional else hidden_size, hidden_size)
    self.relu1 = nn.ReLU()
    self.fc2 = nn.Linear(hidden_size, 1)

  def forward(self, x): # (N, L)
    x = x.transpose(0, 1)
    x = self.embedding(x) # (N, L, D)
    x, h1 = self.gru1(x)
    x = self.fc1(x[-1]) # (N, D)
    x = self.relu1(x)
    x = self.fc2(x)

    return x

class LSTMModel(nn.Module):
  """
  rnn(150) -> rnn(150) -> fc(150, 150) -> ReLU() -> fc(150,1)
  """
  def __init__(self, embedding, hidden_size=150, num_layers=2, dropout=0, bidirectional=False):
    super().__init__()
    self.embedding = embedding
    self.lstm1 = nn.LSTM(input_size=300, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional)
    self.fc1 = nn.Linear(hidden_size * 2 if bidirectional else hidden_size, hidden_size)
    self.relu1 = nn.ReLU()
    self.fc2 = nn.Linear(hidden_size, 1)

  def forward(self, x): # (N, L)
    x = x.transpose(0, 1)
    x = self.embedding(x) # (N, L, D)
    x, h1 = self.lstm1(x) # (N, L, D * Hout)
    x = self.fc1(x[-1]) # (L, D * Hout)
    x = self.relu1(x)
    x = self.fc2(x)

    return x


**RNN**

In [None]:
args = {
      'seed': 7052020,
      'TRAIN_PATH': 'sst_train_raw.csv',
      'VAL_PATH': 'sst_valid_raw.csv',
      'TEST_PATH': 'sst_test_raw.csv',
      'which_model': 'rnn',
      'hidden_size': 150,
      'num_layers': 2,
      'dropout': 0,
      'bidirectional': False,
      'freeze': True,
      'epochs': 10,
      'train_batch_size': 10,
      'valid_batch_size': 32,
      'test_batch_size': 32,
      'which_embedding': 'text',
      'max_size': -1,
      'min_freq': 0,
      'learning_rate': 1e-4,
      'clip': 0.25, # https://neptune.ai/blog/understanding-gradient-clipping-and-how-it-can-fix-exploding-gradients-problem
      'metrics': ('accuracy','f1_score', 'confusion_matrix')
}

_, _, _, _, _, _ = main(args)

test accuracy = 79.702%
test recall = 73.284%
test precision = 92.290%
test f1_score = 81.696%
test confusion matrix: 
[[300 144]
 [ 33 395]]


**GRU**

In [None]:
args = {
      'seed': 7052020,
      'TRAIN_PATH': 'sst_train_raw.csv',
      'VAL_PATH': 'sst_valid_raw.csv',
      'TEST_PATH': 'sst_test_raw.csv',
      'which_model': 'gru',
      'hidden_size': 150,
      'num_layers': 2,
      'dropout': 0,
      'bidirectional': False,
      'freeze': True,
      'epochs': 10,
      'train_batch_size': 10,
      'valid_batch_size': 32,
      'test_batch_size': 32,
      'which_embedding': 'text',
      'learning_rate': 1e-4,
      'max_size': -1,
      'min_freq': 0,
      'clip': 0.25, # https://neptune.ai/blog/understanding-gradient-clipping-and-how-it-can-fix-exploding-gradients-problem
      'metrics': ('accuracy','f1_score', 'confusion_matrix')
}

_, _, _, _, _, _ = main(args)

test accuracy = 85.092%
test recall = 82.533%
test precision = 88.318%
test f1_score = 85.327%
test confusion matrix: 
[[364  80]
 [ 50 378]]


**LSTM**

In [None]:
args = {
      'seed': 7052020,
      'TRAIN_PATH': 'sst_train_raw.csv',
      'VAL_PATH': 'sst_valid_raw.csv',
      'TEST_PATH': 'sst_test_raw.csv',
      'which_model': 'lstm',
      'hidden_size': 150,
      'num_layers': 2,
      'dropout': 0,
      'bidirectional': False,
      'freeze': True,
      'epochs': 10,
      'train_batch_size': 10,
      'valid_batch_size': 32,
      'test_batch_size': 32,
      'which_embedding': 'text',
      'learning_rate': 1e-4,
      'max_size': -1,
      'min_freq': 0,
      'clip': 0.25, # https://neptune.ai/blog/understanding-gradient-clipping-and-how-it-can-fix-exploding-gradients-problem
      'metrics': ('accuracy','f1_score', 'confusion_matrix')
}

_, _, _, _, _, _ = main(args)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


#Zadatak 4. Usporedba modela i pretraga hiperparametara (25% bodova)

Vanilla RNN - Test Accuracy: 79.702%

GRU - Test Accuracy: 85.092%

LSTM - Test Accuracy: 83.486%

In [None]:
which_models = ['rnn', 'gru', 'lstm']
hidden_layers = [100, 150, 200]
num_layers = [1, 2, 3]
dropout = [0, 0.25, 0.5]
bidirectional = [True, False]

In [None]:
best_f1_score = None
best_hiperparams = None

In [None]:
def write_into_file(filename, metrics, hiperparams):
  with open(filename, 'a') as file:
    file.write(f'Model: {hiperparams[0]}\n')
    file.write(f'Hiperparams:\n')
    file.write(f'\t hidden_layers: {hiperparams[1]}\n')
    file.write(f'\t num_layers: {hiperparams[2]}\n')
    file.write(f'\t dropout: {hiperparams[3]}\n')
    file.write(f'\t bidirectional: {hiperparams[4]}\n')
    file.write(f'Metrics:\n')
    file.write(f'\t accuracy: {metrics[1]:.2f}\n')
    file.write(f'\t recall: {metrics[2]:.2f}\n')
    file.write(f'\t precision: {metrics[3]:.2f}\n')
    file.write(f'\t f1: {metrics[4]:.2f}\n')
    file.write('______________________________________________________\n')

  return

In [None]:
for wm in which_models:
  for hs in hidden_layers:
    for nl in num_layers:
      for d in dropout:
        for bd in bidirectional:
          args = {
                'seed': 7052020,
                'TRAIN_PATH': 'sst_train_raw.csv',
                'VAL_PATH': 'sst_valid_raw.csv',
                'TEST_PATH': 'sst_test_raw.csv',
                'which_model': wm,
                'hidden_size': hs,
                'num_layers': nl,
                'dropout': d,
                'bidirectional': bd,
                'freeze': True,
                'epochs': 10,
                'train_batch_size': 10,
                'valid_batch_size': 32,
                'test_batch_size': 32,
                'which_embedding': 'text',
                'learning_rate': 1e-4,
                'clip': 0.25, # https://neptune.ai/blog/understanding-gradient-clipping-and-how-it-can-fix-exploding-gradients-problem
                'metrics': ('accuracy','f1_score', 'confusion_matrix')
          }

          train_loss, accuracy, recall, precision, f1, M = main(args)
          if best_f1_score is None or best_f1_score < f1:
            best_f1_score = f1
            best_hiperparams = (wm, hs, nl, d, bd)

          metrics = (train_loss, accuracy, recall, precision, f1, M)
          hiperparams = (wm, hs, nl, d, bd)
          write_into_file('hiperparam_log.txt', metrics, hiperparams)

test accuracy = 79.702%
test recall = 73.284%
test precision = 92.290%
test f1_score = 81.696%
test confusion matrix: 
[[300 144]
 [ 33 395]]
test accuracy = 79.702%
test recall = 73.284%
test precision = 92.290%
test f1_score = 81.696%
test confusion matrix: 
[[300 144]
 [ 33 395]]
test accuracy = 79.702%
test recall = 73.284%
test precision = 92.290%
test f1_score = 81.696%
test confusion matrix: 
[[300 144]
 [ 33 395]]
test accuracy = 79.702%
test recall = 73.284%
test precision = 92.290%
test f1_score = 81.696%
test confusion matrix: 
[[300 144]
 [ 33 395]]
test accuracy = 79.702%
test recall = 73.284%
test precision = 92.290%
test f1_score = 81.696%
test confusion matrix: 
[[300 144]
 [ 33 395]]
test accuracy = 79.702%
test recall = 73.284%
test precision = 92.290%
test f1_score = 81.696%
test confusion matrix: 
[[300 144]
 [ 33 395]]
test accuracy = 79.702%
test recall = 73.284%
test precision = 92.290%
test f1_score = 81.696%
test confusion matrix: 
[[300 144]
 [ 33 395]]
test a

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 83.486%
test recall = 78.287%
test precision = 91.822%
test f1_score = 84.516%
test confusion matrix: 
[[335 109]
 [ 35 393]]


In [None]:
print(best_hiperparams)
print(best_f1_score)

with open('best_model.txt', 'w') as file:
  file.write(f'Best hiperparams: {best_hiperparams}\n')
  file.write(f'Best f1_score: {best_f1_score}\n')

('gru', 100, 1, 0, True)
0.8532731376975169


najbolji set hiperparametara bez korištenja prednaučene vektorske reprezentacije

In [None]:
with open('best_model.txt', 'r') as file:
  best_hiperparams = file.readline().strip().split(': ')[1][1:-1].split(', ')

In [None]:
  args = {
        'seed': 7052020,
        'TRAIN_PATH': 'sst_train_raw.csv',
        'VAL_PATH': 'sst_valid_raw.csv',
        'TEST_PATH': 'sst_test_raw.csv',
        'which_model': best_hiperparams[0],
        'hidden_size': int(best_hiperparams[1]),
        'num_layers': int(best_hiperparams[2]),
        'dropout': int(best_hiperparams[3]),
        'bidirectional': bool(best_hiperparams[4]),
        'freeze': False,
        'epochs': 10,
        'train_batch_size': 10,
        'valid_batch_size': 32,
        'test_batch_size': 32,
        'which_embedding': 'normal',
        'max_size': -1,
        'min_freq': 0,
        'learning_rate': 1e-4,
        'clip': 0.25, # https://neptune.ai/blog/understanding-gradient-clipping-and-how-it-can-fix-exploding-gradients-problem
        'metrics': ('accuracy','f1_score', 'confusion_matrix')
  }
  train_loss, accuracy, recall, precision, f1, M = main(args)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 94.381%
test recall = 90.405%
test precision = 99.065%
test f1_score = 94.537%
test confusion matrix: 
[[399  45]
 [  4 424]]


1. Veličina vokabulara V
2. Veličina batcha
3. Dropout
4. Dimenzionalnost skrivenih slojeva
5. Iznos na koji se podrezuju vrijednosti gradijenata

In [None]:
max_size = [1000, 2000, -1]
batch_size = [100, 20, 10]
dropout = [0.5, 0.25, 0]
hidden_size = [50, 100, 200]
clip = [1, 0.5, 0.25]

for i in range(3):
  args = {
        'seed': 7052020,
        'TRAIN_PATH': 'sst_train_raw.csv',
        'VAL_PATH': 'sst_valid_raw.csv',
        'TEST_PATH': 'sst_test_raw.csv',
        'which_model': 'gru',
        'hidden_size': hidden_size[i],
        'num_layers': 2,
        'dropout': dropout[i],
        'bidirectional': True,
        'freeze': True,
        'epochs': 10,
        'train_batch_size': batch_size[i],
        'valid_batch_size': 32,
        'test_batch_size': 32,
        'which_embedding': 'text',
        'max_size': max_size[i],
        'min_freq': 0,
        'learning_rate': 1e-4,
        'clip': clip[i], # https://neptune.ai/blog/understanding-gradient-clipping-and-how-it-can-fix-exploding-gradients-problem
        'metrics': ('accuracy','f1_score', 'confusion_matrix')
  }

  train_loss, accuracy, recall, precision, f1, M = main(args)

  metrics = (train_loss, accuracy, recall, precision, f1, M)
  hiperparams = (max_size[i], batch_size[i], dropout[i], hidden_size[i], clip[i])
  write_into_file('hiperparam_log_gru.txt', metrics, hiperparams)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 49.771%
test recall = 44.318%
test precision = 9.112%
test f1_score = 15.116%
test confusion matrix: 
[[395  49]
 [389  39]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


test accuracy = 50.459%
test recall = 44.444%
test precision = 3.738%
test f1_score = 6.897%
test confusion matrix: 
[[424  20]
 [412  16]]
test accuracy = 85.092%
test recall = 82.533%
test precision = 88.318%
test f1_score = 85.327%
test confusion matrix: 
[[364  80]
 [ 50 378]]


In [None]:
max_size = [1000, 2000, -1]
batch_size = [100, 20, 10]
dropout = [0.5, 0.25, 0]
hidden_size = [50, 100, 200]
clip = [1, 0.5, 0.25]

for i in range(3):
  args = {
        'seed': 7052020,
        'TRAIN_PATH': 'sst_train_raw.csv',
        'VAL_PATH': 'sst_valid_raw.csv',
        'TEST_PATH': 'sst_test_raw.csv',
        'which_model': 'baseline',
        'hidden_size': hidden_size[i],
        'num_layers': 2,
        'dropout': dropout[i],
        'bidirectional': True,
        'freeze': True,
        'epochs': 10,
        'train_batch_size': batch_size[i],
        'valid_batch_size': 32,
        'test_batch_size': 32,
        'which_embedding': 'text',
        'max_size': max_size[i],
        'min_freq': 0,
        'learning_rate': 1e-4,
        'clip': clip[i], # https://neptune.ai/blog/understanding-gradient-clipping-and-how-it-can-fix-exploding-gradients-problem
        'metrics': ('accuracy','f1_score', 'confusion_matrix')
  }

  train_loss, accuracy, recall, precision, f1, M = main(args)

  metrics = (train_loss, accuracy, recall, precision, f1, M)
  hiperparams = (max_size[i], batch_size[i], dropout[i], hidden_size[i], clip[i])
  write_into_file('hiperparam_log_baseline.txt', metrics, hiperparams)

test accuracy = 52.179%
test recall = 54.264%
test precision = 16.355%
test f1_score = 25.135%
test confusion matrix: 
[[385  59]
 [358  70]]
test accuracy = 53.211%
test recall = 54.065%
test precision = 31.075%
test f1_score = 39.466%
test confusion matrix: 
[[331 113]
 [295 133]]
test accuracy = 76.376%
test recall = 74.888%
test precision = 78.037%
test f1_score = 76.430%
test confusion matrix: 
[[332 112]
 [ 94 334]]


Veličina vokabulara, iznos na koji se podrezuju gradijenti, bidirectional...

#Bonus zadatak: pozornost (max 20% bodova)

In [None]:
class RNNwithAttentionModel(nn.Module):
  """
  rnn(150) -> rnn(150) -> fc(150, 150) -> ReLU() -> fc(150,1)
  """
  def __init__(self, embedding, hidden_size=150, num_layers=2, dropout=0, bidirectional=False):
    super().__init__()
    self.embedding = embedding
    self.rnn1 = nn.RNN(input_size=300, hidden_size=hidden_size, num_layers=num_layers, dropout=dropout, bidirectional=bidirectional)
    self.fc1 = nn.Linear(hidden_size * 2 if bidirectional else hidden_size, hidden_size)
    self.w2 = torch.zeros(1)
    self.relu1 = nn.ReLU()
    self.fc2 = nn.Linear(hidden_size, 1)

  def forward(self, x): # (N, L)
    x = x.transpose(0, 1)
    x = self.embedding(x) # (N, L, D)
    x, h1 = self.rnn1(x) # (N, L, )
    print("h1_shape", h1.shape) # (D, N, H_out)
    a_t = self.w2 * torch.tanh(self.fc1(h1[-1])) # (N, H)
    print("a_t", a_t.shape)
    alpha = torch.softmax(a_t, dim=1) # (N, H)
    print("alpha_t", alpha.shape)
    out_attn = torch.sum()
    x = self.relu1(x)
    x = self.fc2(x)

    return x

In [None]:
args = {
        'seed': 7052020,
        'TRAIN_PATH': 'sst_train_raw.csv',
        'VAL_PATH': 'sst_valid_raw.csv',
        'TEST_PATH': 'sst_test_raw.csv',
        'which_model': 'rnn_with_attention',
        'hidden_size': 150,
        'num_layers': 2,
        'dropout': 0,
        'bidirectional': False,
        'freeze': True,
        'epochs': 10,
        'train_batch_size': 10,
        'valid_batch_size': 32,
        'test_batch_size': 32,
        'which_embedding': 'text',
        'max_size': -1,
        'min_freq': 0,
        'learning_rate': 1e-4,
        'clip': 0.25, # https://neptune.ai/blog/understanding-gradient-clipping-and-how-it-can-fix-exploding-gradients-problem
        'metrics': ('accuracy','f1_score', 'confusion_matrix')
}
#train_loss, accuracy, recall, precision, f1, M = main(args)