In [1]:
from google.colab import drive
drive.mount('/gdrive', force_remount=True)

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /gdrive


In [2]:
! pip install transformers -q
! pip install tokenizers -q

[K     |████████████████████████████████| 778kB 4.7MB/s 
[K     |████████████████████████████████| 3.0MB 23.3MB/s 
[K     |████████████████████████████████| 890kB 42.9MB/s 
[K     |████████████████████████████████| 1.1MB 45.1MB/s 
[?25h  Building wheel for sacremoses (setup.py) ... [?25l[?25hdone


In [3]:
import re
import pandas as pd
from pathlib import Path
import matplotlib.cm as cm
import numpy as np
import pandas as pd
from typing import *
from tqdm.notebook import tqdm
from sklearn.utils.extmath import softmax
from sklearn import model_selection
from sklearn.metrics import classification_report, f1_score, roc_auc_score

In [4]:
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
import transformers
from transformers import AdamW
import tokenizers

In [5]:
def seed_all(seed = 42):
  """
  Fix seed for reproducibility
  """
  # python RNG
  import random
  random.seed(seed)

  # pytorch RNGs
  import torch
  torch.manual_seed(seed)
  torch.backends.cudnn.deterministic = True
  if torch.cuda.is_available(): torch.cuda.manual_seed_all(seed)

  # numpy RNG
  import numpy as np
  np.random.seed(seed)

In [6]:
import os
os.chdir('/gdrive/My Drive/Sentihood/')

In [7]:
class config:
  SEED = 42
  KFOLD = 5
  TRAIN_FILE = 'train_multi.csv'
  VAL_FILE = 'dev_multi.csv'
  TEST_FILE = 'test_multi.csv'
  PSEUDO_FILE = os.path.join('run_roberta_multi_20', 'pseudo_labels.csv')
  COMPLETE_TEST = 'complete_test.csv'
  SAVE_DIR = 'run_roberta_multi_pseudo_20'
  MAX_LEN = 192
  MODEL = './roberta-base'
  TOKENIZER = transformers.RobertaTokenizer.from_pretrained(MODEL)
  EPOCHS = 5*2
  TRAIN_BATCH_SIZE = 16
  VALID_BATCH_SIZE = 16

In [8]:
class AverageMeter:
    """
    Computes and stores the average and current value
    Source : https://www.kaggle.com/abhishek/bert-base-uncased-using-pytorch/
    """
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [9]:
class EarlyStopping:
    """
    Early stopping utility
    Source : https://www.kaggle.com/abhishek/bert-base-uncased-using-pytorch/
    """
    
    def __init__(self, patience=7, mode="max", delta=0.001):
        self.patience = patience
        self.counter = 0
        self.mode = mode
        self.best_score = None
        self.early_stop = False
        self.delta = delta
        if self.mode == "min":
            self.val_score = np.Inf
        else:
            self.val_score = -np.Inf

    def __call__(self, epoch_score, model, model_path):
        if self.mode == "min":
            score = -1.0 * epoch_score
        else:
            score = np.copy(epoch_score)
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print('EarlyStopping counter: {} out of {}'.format(self.counter, self.patience))
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(epoch_score, model, model_path)
            self.counter = 0

    def save_checkpoint(self, epoch_score, model, model_path):
        if epoch_score not in [-np.inf, np.inf, -np.nan, np.nan]:
            print('Validation score improved ({} --> {}). Saving model!'.format(self.val_score, epoch_score))
            torch.save(model.state_dict(), model_path)
        self.val_score = epoch_score

In [10]:
df = pd.read_csv(config.TRAIN_FILE)
list_aspects = []
for i, r in df.iterrows():
  list_aspects.extend(r['aspect'].split(' '))

aspects = sorted(set(list_aspects))

aspect2idx = {v:k for k,v in enumerate(aspects)}
idx2aspect = {v:k for k,v in aspect2idx.items()}

In [11]:
sentiment2idx = {'Negative':1, 'Positive':2, 'None':0}
idx2sentiment = {0:'None', 1:'Negative', 2:'Positive'}

In [12]:
def process_data(text, target, aspect, sentiment, tokenizer, max_len):

  text = str(text).strip()
  target = str(target).strip()
 
  question = f'find about {target}'

  tokens = tokenizer(text = question, text_pair=text)

  aspect = aspect.split(' ')
  sentiment = sentiment.split(' ')

  aspect_class = [0]*len(aspect2idx)
  for i in aspect:
    aspect_class[aspect2idx[i]] = 1

  sentiment_class = [0]*len(aspect2idx)
  for i, j in zip(aspect, sentiment):
    sentiment_class[aspect2idx[i]] = sentiment2idx[j]


  token_ids = tokens.input_ids
  mask = [1] * len(token_ids)  

  padding = max_len - len(token_ids)
  
  if padding>=0:
    token_ids = token_ids + ([0] * padding)
    mask = mask + ([0] * padding)
    
  else:
    token_ids = token_ids[0:max_len]
    mask = mask[0:max_len]
    
  
  assert len(token_ids)==max_len
  assert len(mask)==max_len
  
  
  return {'text': text,
          'ids': token_ids,
          'mask': mask,
          'aspect': aspect_class,
          'sentiment': sentiment_class,
        }

In [13]:
class Dataset:
    def __init__(self, text, target, aspect, sentiment):
        self.text = text
        self.target = target
        self.aspect = aspect
        self.sentiment = sentiment
        self.tokenizer = config.TOKENIZER
        self.max_len = config.MAX_LEN
    
    def __len__(self):
        return len(self.text)

    def __getitem__(self, item):
        data = process_data(
            self.text[item],
            self.target[item],
            self.aspect[item], 
            self.sentiment[item],
            self.tokenizer,
            self.max_len,
        )

        return {
            'ids': torch.tensor(data["ids"], dtype=torch.long),
            'mask': torch.tensor(data["mask"], dtype=torch.long),
            'aspect': torch.tensor(data['aspect'], dtype=torch.float32),
            'sentiment': torch.tensor(data['sentiment'], dtype=torch.long),
        }

In [14]:
def get_loss(aspect, aspect_logits, sentiment, sentiment_logits):
  loss_fn1 = nn.BCEWithLogitsLoss()
  loss_fn2 = nn.CrossEntropyLoss()

  aspect_loss = loss_fn1(aspect_logits, aspect)
  
  # adjusting dimensionality for CrossEntropy loss
  sentiment_logits = sentiment_logits.view(-1, len(sentiment2idx))
  sentiment = sentiment.view(-1)

  sentiment_loss = loss_fn2(sentiment_logits, sentiment)

  loss = aspect_loss + sentiment_loss
  return loss


In [15]:
class RobertaM(nn.Module):
  def __init__(self):
    super(RobertaM, self).__init__()
    self.bert = transformers.RobertaModel.from_pretrained(config.MODEL, output_hidden_states=False)
    self.aspect_cls =  nn.Linear(768, len(aspect2idx))
    self.sentiment_cls = nn.Linear(768, len(sentiment2idx)*len(aspect2idx))
    self.drop = nn.Dropout(0.1)
    

  def forward(self, ids, mask, aspect=None, sentiment=None):
    
    output = self.bert(input_ids = ids, attention_mask = mask)
    out = self.drop(output[1])

    aspect_logits = self.aspect_cls(out)
    sentiment_logits = self.sentiment_cls(out).view(-1, len(aspect2idx), len(sentiment2idx)) 

    loss = get_loss(aspect, aspect_logits, sentiment, sentiment_logits)    

    return loss, aspect_logits, sentiment_logits

In [16]:
def train_fn(data_loader, model, optimizer, device):
  model.train()
  losses = AverageMeter()
  tk0 = tqdm(data_loader, total=len(data_loader))
  
  for bi, d in enumerate(tk0):
    
    ids = d['ids'].to(device, dtype=torch.long)
    mask = d['mask'].to(device, dtype=torch.long)
    aspect = d['aspect'].to(device, dtype=torch.float32)
    sentiment = d['sentiment'].to(device, dtype=torch.long)
    

    model.zero_grad()
    loss, aspect_logits, sentiment_logits = model(ids, mask, aspect, sentiment)

    loss.backward()
    optimizer.step()

    losses.update(loss.item(), ids.size(0))
    tk0.set_postfix(loss=losses.avg)


In [17]:
def one_hot(a, num_classes):
  return np.squeeze(np.eye(num_classes)[a.reshape(-1)])

def sigmoid(x):
  return 1 / (1 + np.exp(-x))

def softmax_custom(x):
  return np.exp(x)/np.sum(np.exp(x), axis=-1)

In [18]:
def eval_fn(data_loader, model, device):
  model.eval()
  losses = AverageMeter()
  
  tk0 = tqdm(data_loader, total=len(data_loader))
  
  a_yt, a_ypb, a_yp = [], [], []
  s_yt, s_ypb, s_yp = [], [], []
  
  
  for bi, d in enumerate(tk0):
    ids = d['ids'].to(device, dtype=torch.long)
    mask = d['mask'].to(device, dtype=torch.long)
    aspect = d['aspect'].to(device, dtype=torch.float32)
    sentiment = d['sentiment'].to(device, dtype=torch.long)
    
    with torch.no_grad():
      loss, aspect_logits, sentiment_logits = model(ids, mask, aspect, sentiment)
  

    aspect_logits = aspect_logits.detach().cpu().numpy()
    sentiment_logits = sentiment_logits.detach().cpu().numpy()

    aspect_prob = sigmoid(aspect_logits)

    sentiment_prob = np.zeros(sentiment_logits.shape)

    for i in range(sentiment_prob.shape[0]):
      sentiment_prob[i] = softmax(sentiment_logits[i])
    
    aspect_pred = np.where(aspect_prob>=0.5, 1, 0)  
    sentiment_pred = np.argmax(sentiment_prob, axis=-1)

    aspect_truth = d['aspect'].to('cpu').numpy()
    sentiment_truth = d['sentiment'].to('cpu').numpy()

    aspect_truth = aspect_truth.flatten()
    sentiment_truth = sentiment_truth.flatten()

    aspect_prob = aspect_prob.flatten()
    sentiment_prob = sentiment_prob.reshape(-1, len(sentiment2idx))
    
    aspect_pred = aspect_pred.flatten()
    sentiment_pred = sentiment_pred.flatten()
    

    a_yt = a_yt + aspect_truth.tolist()
    a_ypb = a_ypb + aspect_prob.tolist()
    a_yp = a_yp + aspect_pred.tolist()

    s_yt = s_yt + sentiment_truth.tolist()
    s_ypb = s_ypb + sentiment_prob.tolist()
    s_yp = s_yp + sentiment_pred.tolist()
      
    losses.update(loss.item(), ids.size(0))

    tk0.set_postfix(loss=losses.avg)

  a_yt = np.array(a_yt)
  s_yt = np.array(s_yt)

  a_yp = np.array(a_yp)
  s_yp = np.array(s_yp)

  a_ypb = np.vstack(a_ypb).flatten()
  s_ypb = np.vstack(s_ypb).reshape(-1, len(sentiment2idx))
  

  print(f'Aspect : F1 score = {f1_score(a_yt, a_yp, average="macro")} | AUC-ROC = {roc_auc_score(a_yt, a_ypb, average="macro")} ')
  print(f'Sentiment : F1 score = {f1_score(s_yt, s_yp, average="macro")} | AUC-ROC = {roc_auc_score(one_hot(s_yt, len(sentiment2idx)), s_ypb, average="macro")} ')
  
  return losses.avg 
  

In [19]:
def run(df_train, df_val, fold=None):
  train_dataset = Dataset(
        text = df_train.text.values,
        target = df_train.target.values,
        aspect = df_train.aspect.values,
        sentiment = df_train.sentiment.values,
    )
  
  valid_dataset = Dataset(
        text = df_val.text.values,
        target = df_val.target.values,
        aspect = df_val.aspect.values,
        sentiment = df_val.sentiment.values,
    )
  
    
  train_data_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=config.TRAIN_BATCH_SIZE,
        num_workers=4
    )

  valid_data_loader = torch.utils.data.DataLoader(
        valid_dataset,
        batch_size=config.VALID_BATCH_SIZE,
        num_workers=2
    )

  model = RobertaM()
  device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")
  model.to(device)
  # torch.save(model.state_dict(), os.path.join(config.SAVE_DIR, f"model_{fold}.bin"))

  lr = 5e-5
  param_optimizer = list(model.named_parameters())
  no_decay = ['bias', 'gamma', 'beta']
  optimizer_grouped_parameters = [
      {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
      'weight_decay_rate': 0.01},
      {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
      'weight_decay_rate': 0.0}
  ]
  optimizer = AdamW(optimizer_grouped_parameters, lr=lr)

  es = EarlyStopping(patience=3, mode="min")

  print('Starting training....')
  for epoch in range(config.EPOCHS):
    train_fn(train_data_loader, model, optimizer, device)
    valid_loss = eval_fn(valid_data_loader, model, device)
    print(f'Epoch : {epoch + 1} | Validation Score :{valid_loss}')
    if fold is None:
      es(valid_loss, model, model_path=os.path.join(config.SAVE_DIR, "model.bin"))
    else:
      es(valid_loss, model, model_path=os.path.join(config.SAVE_DIR, f"model_{fold}.bin"))
    if es.early_stop:
      print("Early stopping")
      break

  return

In [20]:
def run_k_fold():
  '''
    Perform k-fold cross-validation
  '''

  seed_all()
  
  df_train = pd.read_csv(config.TRAIN_FILE)
  df_val = pd.read_csv(config.VAL_FILE)
  
  df_extra = pd.read_csv(config.PSEUDO_FILE)
  df_extra.dropna(inplace=True)
  
  df_pseudo = pd.DataFrame({'id': df_extra['id'].values,
                            'text': df_extra['text'].values,
                            'aspect': df_extra['pred_aspect'].values,
                            'sentiment': df_extra['pred_sentiment'].values,
                            })


  # concatenating train and validation set
  train = pd.concat([df_train, df_val, df_pseudo]).reset_index()

  # dividing folds
  kf = model_selection.StratifiedKFold(n_splits=config.KFOLD, shuffle=False, random_state=config.SEED)
  for fold, (train_idx, val_idx) in enumerate(kf.split(X=train, y=train.aspect.values)):
      train.loc[val_idx, 'kfold'] = fold

  
  aspect, sentiment = [], []

  for i in range(config.KFOLD):
    print(f'################################################ Fold {i} ##################################################')
    df_train = train[train.kfold!=i]
    df_val = train[train.kfold==i]
    run(df_train, df_val, i)
  

In [21]:
def test_fn(data_loader, model, device, cal_metric=True):
  model.eval()
  losses = AverageMeter()
  
  tk0 = tqdm(data_loader, total=len(data_loader))
  
  a_yt, a_ypb, a_yp = [], [], []
  s_yt, s_ypb, s_yp = [], [], []
  
  for bi, d in enumerate(tk0):
    ids = d['ids'].to(device, dtype=torch.long)
    mask = d['mask'].to(device, dtype=torch.long)
    aspect = d['aspect'].to(device, dtype=torch.float32)
    sentiment = d['sentiment'].to(device, dtype=torch.long)
    
    with torch.no_grad():
      loss, aspect_logits, sentiment_logits = model(ids, mask, aspect, sentiment)
  

    aspect_logits = aspect_logits.detach().cpu().numpy()
    sentiment_logits = sentiment_logits.detach().cpu().numpy()

    aspect_prob = sigmoid(aspect_logits) # (bs, a)
    sentiment_prob = np.zeros(sentiment_logits.shape) # (bs, a, s) 

    
    for i in range(sentiment_prob.shape[0]):
      sentiment_prob[i] = softmax(sentiment_logits[i])
    
    aspect_pred = np.where(aspect_prob>=0.5, 1, 0)  # (bs, a)
    sentiment_pred = np.argmax(sentiment_prob, axis=-1) # (bs, a)


    aspect_truth = d['aspect'].to('cpu').numpy() # (bs, a)
    sentiment_truth = d['sentiment'].to('cpu').numpy() # (bs, a)

    aspect_truth = aspect_truth.flatten()
    sentiment_truth = sentiment_truth.flatten()

    aspect_prob = aspect_prob.flatten()
    sentiment_prob = sentiment_prob.reshape(-1, len(sentiment2idx))
    
    aspect_pred = aspect_pred.flatten()
    sentiment_pred = sentiment_pred.flatten()
    

    a_yt = a_yt + aspect_truth.tolist()
    a_ypb = a_ypb + aspect_prob.tolist()
    a_yp = a_yp + aspect_pred.tolist()

    s_yt = s_yt + sentiment_truth.tolist()
    s_ypb = s_ypb + sentiment_prob.tolist()
    s_yp = s_yp + sentiment_pred.tolist()
      
    losses.update(loss.item(), ids.size(0))

    tk0.set_postfix(loss=losses.avg)
    

  a_yt = np.array(a_yt)
  s_yt = np.array(s_yt)

  a_yp = np.array(a_yp)
  s_yp = np.array(s_yp)

  a_ypb = np.vstack(a_ypb).flatten()
  s_ypb = np.vstack(s_ypb).reshape(-1, len(sentiment2idx))
  
  if cal_metric:
    print(f'Aspect : F1 score = {f1_score(a_yt, a_yp, average="macro")} | AUC-ROC = {roc_auc_score(a_yt, a_ypb, average="macro")} ')
    print(f'Sentiment : F1 score = {f1_score(s_yt, s_yp, average="macro")} | AUC-ROC = {roc_auc_score(one_hot(s_yt, len(sentiment2idx)), s_ypb, average="macro")} ')
  
  # changing to original shape
  a_ypb = a_ypb.reshape(-1, len(aspect2idx))
  s_ypb = s_ypb.reshape(-1, len(aspect2idx), len(sentiment2idx))

  return a_ypb, s_ypb
  

In [22]:
def generate_results(df_test, cal_metric=False):
  test_dataset = Dataset(
        text = df_test.text.values,
        target = df_test.target.values,
        aspect = df_test.aspect.values,
        sentiment = df_test.sentiment.values,
    )
  test_data_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=config.VALID_BATCH_SIZE,
        num_workers=2
    )

  model = RobertaM()
  sentiment, aspect = [], []
  device = torch.device("cuda:0" if (torch.cuda.is_available()) else "cpu")

  for fold in range(config.KFOLD):
    model.load_state_dict(torch.load(os.path.join(config.SAVE_DIR, f"model_{fold}.bin")))
    model.to(device)
    ap, sp = test_fn(test_data_loader, model, device, cal_metric)
    aspect.append(ap)
    sentiment.append(sp)

  # avg probabilities
  avg_aspect = sum(aspect)/config.KFOLD # (bs, a)
  avg_sentiment = sum(sentiment)/config.KFOLD # (bs, a, s)

  aspect_pred = np.where(avg_aspect>=0.5, 1, 0)  # (bs, a)
  sentiment_pred = np.argmax(avg_sentiment, axis=-1) # (bs, a)

  predicted_aspect, predicted_sentiment = [], []
    
  for i in range(aspect_pred.shape[0]):
    a, s = [], []
    for j in range(aspect_pred.shape[1]):
      ## aspect is present and sentiment is not "None"
      if aspect_pred[i][j]==1 and sentiment_pred[i][j]!=0:
        a.append(idx2aspect[j])
        s.append(idx2sentiment[sentiment_pred[i][j]])
    predicted_aspect.append(' '.join(a))
    predicted_sentiment.append(' '.join(s))

  df_test['pred_aspect'] = predicted_aspect
  df_test['pred_sentiment'] = predicted_sentiment

  a_yt, s_yt = [], []

  for d in test_data_loader:
    a_yt.append(d['aspect'])
    s_yt.append(d['sentiment'])

  a_yt = np.vstack(a_yt).flatten()
  s_yt = np.vstack(s_yt).flatten()

  avg_aspect = avg_aspect.reshape(-1)
  avg_sentiment = avg_sentiment.reshape(-1, len(sentiment2idx))

  aspect_pred = aspect_pred.flatten()
  sentiment_pred = sentiment_pred.flatten()
  
  if cal_metric:
    print('Final Results : ')
    print(f'Aspect : F1 score = {f1_score(a_yt, aspect_pred, average="macro")}\
    | AUC-ROC = {roc_auc_score(a_yt, avg_aspect, average="macro")} ')
    
    print(f'Sentiment : F1 score = {f1_score(s_yt, sentiment_pred, average="macro")}\
    | AUC-ROC = {roc_auc_score(one_hot(s_yt, len(sentiment2idx)), avg_sentiment, average="macro")} ')


  return df_test

In [23]:
if __name__=='__main__':
  ! rm -rf {config.SAVE_DIR} && mkdir {config.SAVE_DIR}
  print('Starting training....')
  run_k_fold()
  
  print('Predicting for labeled test dataset....')
  df_test = pd.read_csv(config.TEST_FILE)
  df_test = generate_results(df_test, True)
  df_test.to_csv(os.path.join(config.SAVE_DIR, 'sub.csv'), index=False)

  print('Generating predictions for complete test data....')
  pseudo = pd.read_csv(config.COMPLETE_TEST)
  df = generate_results(pseudo, cal_metric=False)
  df.to_csv(os.path.join(config.SAVE_DIR, 'predictions.csv'), index=False)


Starting training....




################################################ Fold 0 ##################################################
Starting training....


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.7541248203403315 | AUC-ROC = 0.8960435914098356 
Sentiment : F1 score = 0.5389399195395644 | AUC-ROC = 0.8897336787619566 
Epoch : 1 | Validation Score :0.4806599169242673
Validation score improved (inf --> 0.4806599169242673). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.8897721862826673 | AUC-ROC = 0.9579880537783019 
Sentiment : F1 score = 0.7353656110810635 | AUC-ROC = 0.9532976910239261 
Epoch : 2 | Validation Score :0.30240813813558437
Validation score improved (0.4806599169242673 --> 0.30240813813558437). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.8971585089577488 | AUC-ROC = 0.9598116974917908 
Sentiment : F1 score = 0.7582421153627888 | AUC-ROC = 0.9574755039155421 
Epoch : 3 | Validation Score :0.277501025723248
Validation score improved (0.30240813813558437 --> 0.277501025723248). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9052238521723255 | AUC-ROC = 0.9785815676872658 
Sentiment : F1 score = 0.7910899566117084 | AUC-ROC = 0.9749835018830795 
Epoch : 4 | Validation Score :0.235912945648519
Validation score improved (0.277501025723248 --> 0.235912945648519). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9253544160287346 | AUC-ROC = 0.9827224242055722 
Sentiment : F1 score = 0.8159833468862615 | AUC-ROC = 0.9800789862201799 
Epoch : 5 | Validation Score :0.20896353532628315
Validation score improved (0.235912945648519 --> 0.20896353532628315). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9251185777931739 | AUC-ROC = 0.9826365306398188 
Sentiment : F1 score = 0.8212744135063818 | AUC-ROC = 0.9806687668252572 
Epoch : 6 | Validation Score :0.20842322343733252
EarlyStopping counter: 1 out of 3


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9302630009742232 | AUC-ROC = 0.9836020891271183 
Sentiment : F1 score = 0.8507299474923865 | AUC-ROC = 0.9817274446485774 
Epoch : 7 | Validation Score :0.20352171354177523
Validation score improved (0.20896353532628315 --> 0.20352171354177523). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9246079168788455 | AUC-ROC = 0.9825782760927485 
Sentiment : F1 score = 0.8409379010808062 | AUC-ROC = 0.9810158677598438 
Epoch : 8 | Validation Score :0.21686718042303876
EarlyStopping counter: 1 out of 3


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9212612059188763 | AUC-ROC = 0.980651517578788 
Sentiment : F1 score = 0.8326049354072387 | AUC-ROC = 0.9787338966739213 
Epoch : 9 | Validation Score :0.22344692131368124
EarlyStopping counter: 2 out of 3


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9277601990999391 | AUC-ROC = 0.9835970928429717 
Sentiment : F1 score = 0.839286452588971 | AUC-ROC = 0.9805865783892834 
Epoch : 10 | Validation Score :0.21792104927504935
EarlyStopping counter: 3 out of 3
Early stopping
################################################ Fold 1 ##################################################
Starting training....


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.6873210620789932 | AUC-ROC = 0.8986518788570832 
Sentiment : F1 score = 0.4759333324722077 | AUC-ROC = 0.887149581688793 
Epoch : 1 | Validation Score :0.5038741949127942
Validation score improved (inf --> 0.5038741949127942). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.8512756918219526 | AUC-ROC = 0.9619177329500321 
Sentiment : F1 score = 0.71153589056409 | AUC-ROC = 0.9581126470393881 
Epoch : 2 | Validation Score :0.3083861307400029
Validation score improved (0.5038741949127942 --> 0.3083861307400029). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9056497851416969 | AUC-ROC = 0.9754262136348641 
Sentiment : F1 score = 0.7651988349251347 | AUC-ROC = 0.9702395853599902 
Epoch : 3 | Validation Score :0.2504988654357631
Validation score improved (0.3083861307400029 --> 0.2504988654357631). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9187955492853902 | AUC-ROC = 0.9831620935015539 
Sentiment : F1 score = 0.7763449305834952 | AUC-ROC = 0.9785625147700235 
Epoch : 4 | Validation Score :0.22060047300850472
Validation score improved (0.2504988654357631 --> 0.22060047300850472). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9331254505058089 | AUC-ROC = 0.9857944355473429 
Sentiment : F1 score = 0.8082426471375429 | AUC-ROC = 0.9812699804081567 
Epoch : 5 | Validation Score :0.192858274535435
Validation score improved (0.22060047300850472 --> 0.192858274535435). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9249457968557766 | AUC-ROC = 0.9844971060770824 
Sentiment : F1 score = 0.8169636677020812 | AUC-ROC = 0.9822366605330769 
Epoch : 6 | Validation Score :0.20175645344141052
EarlyStopping counter: 1 out of 3


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.932525913444323 | AUC-ROC = 0.9860806553725426 
Sentiment : F1 score = 0.8487119519055404 | AUC-ROC = 0.9839036611963414 
Epoch : 7 | Validation Score :0.18723599569099705
Validation score improved (0.192858274535435 --> 0.18723599569099705). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9301728353333314 | AUC-ROC = 0.985018013275833 
Sentiment : F1 score = 0.8392755191335398 | AUC-ROC = 0.9829925174450317 
Epoch : 8 | Validation Score :0.1929808662068553
EarlyStopping counter: 1 out of 3


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9323773169656036 | AUC-ROC = 0.9853967767954523 
Sentiment : F1 score = 0.8646170537664523 | AUC-ROC = 0.9833682732767685 
Epoch : 9 | Validation Score :0.1864396323881498
EarlyStopping counter: 2 out of 3


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.93522209197238 | AUC-ROC = 0.9860567679340179 
Sentiment : F1 score = 0.8749542461319374 | AUC-ROC = 0.9856328096563464 
Epoch : 10 | Validation Score :0.18871148075999283
EarlyStopping counter: 3 out of 3
Early stopping
################################################ Fold 2 ##################################################
Starting training....


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.7941426941537285 | AUC-ROC = 0.9365130474586915 
Sentiment : F1 score = 0.6480081891413788 | AUC-ROC = 0.9324715470616064 
Epoch : 1 | Validation Score :0.3890503621682888
Validation score improved (inf --> 0.3890503621682888). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.889672683066556 | AUC-ROC = 0.9635816009606033 
Sentiment : F1 score = 0.7509190670421821 | AUC-ROC = 0.959527450194238 
Epoch : 2 | Validation Score :0.28757378589816207
Validation score improved (0.3890503621682888 --> 0.28757378589816207). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9217275128192948 | AUC-ROC = 0.9837419591701053 
Sentiment : F1 score = 0.803390304652937 | AUC-ROC = 0.9790144436067231 
Epoch : 3 | Validation Score :0.21750997988189139
Validation score improved (0.28757378589816207 --> 0.21750997988189139). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9185051036528278 | AUC-ROC = 0.9822792689508996 
Sentiment : F1 score = 0.8212195355763248 | AUC-ROC = 0.9789444399726284 
Epoch : 4 | Validation Score :0.2101271505036005
Validation score improved (0.21750997988189139 --> 0.2101271505036005). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9189728587680805 | AUC-ROC = 0.9825824122175574 
Sentiment : F1 score = 0.8302306489259395 | AUC-ROC = 0.9801782486111379 
Epoch : 5 | Validation Score :0.21128876296485344
EarlyStopping counter: 1 out of 3


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9308219266393101 | AUC-ROC = 0.9818518656940102 
Sentiment : F1 score = 0.846867454742422 | AUC-ROC = 0.9805147882122954 
Epoch : 6 | Validation Score :0.20457904963958554
Validation score improved (0.2101271505036005 --> 0.20457904963958554). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9280486573124458 | AUC-ROC = 0.9851395912453247 
Sentiment : F1 score = 0.8446802398433298 | AUC-ROC = 0.9825069957248732 
Epoch : 7 | Validation Score :0.19657956761557882
Validation score improved (0.20457904963958554 --> 0.19657956761557882). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9303297705491307 | AUC-ROC = 0.9836683827748607 
Sentiment : F1 score = 0.8561972721199088 | AUC-ROC = 0.9827224828180724 
Epoch : 8 | Validation Score :0.20558191221661684
EarlyStopping counter: 1 out of 3


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9306422993704346 | AUC-ROC = 0.9818753973631114 
Sentiment : F1 score = 0.8570603975194936 | AUC-ROC = 0.980707255540934 
Epoch : 9 | Validation Score :0.211974132842407
EarlyStopping counter: 2 out of 3


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9350930112212659 | AUC-ROC = 0.9852566107219868 
Sentiment : F1 score = 0.8644539716509624 | AUC-ROC = 0.9830215708672991 
Epoch : 10 | Validation Score :0.19396926669812783
Validation score improved (0.19657956761557882 --> 0.19396926669812783). Saving model!
################################################ Fold 3 ##################################################
Starting training....


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.7677600524183305 | AUC-ROC = 0.934380054304937 
Sentiment : F1 score = 0.5964853052899196 | AUC-ROC = 0.9357821873206656 
Epoch : 1 | Validation Score :0.41040660695331854
Validation score improved (inf --> 0.41040660695331854). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.8872188787538606 | AUC-ROC = 0.9789398499257653 
Sentiment : F1 score = 0.743977647976484 | AUC-ROC = 0.9744113029372993 
Epoch : 2 | Validation Score :0.2591183355668696
Validation score improved (0.41040660695331854 --> 0.2591183355668696). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9003050129542464 | AUC-ROC = 0.9766442759118815 
Sentiment : F1 score = 0.789542585602636 | AUC-ROC = 0.9752965861759479 
Epoch : 3 | Validation Score :0.23766953407264338
Validation score improved (0.2591183355668696 --> 0.23766953407264338). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.92691867267174 | AUC-ROC = 0.9842002059842436 
Sentiment : F1 score = 0.8311265084826657 | AUC-ROC = 0.9820986475189923 
Epoch : 4 | Validation Score :0.19479886954877434
Validation score improved (0.23766953407264338 --> 0.19479886954877434). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9201235658891234 | AUC-ROC = 0.9837226435536295 
Sentiment : F1 score = 0.8417931306971044 | AUC-ROC = 0.9820789930087246 
Epoch : 5 | Validation Score :0.2074761142817939
EarlyStopping counter: 1 out of 3


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9292172125319712 | AUC-ROC = 0.9849529847651913 
Sentiment : F1 score = 0.8468731087950792 | AUC-ROC = 0.9842955638170588 
Epoch : 6 | Validation Score :0.18625856467136523
Validation score improved (0.19479886954877434 --> 0.18625856467136523). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9336108045866718 | AUC-ROC = 0.9858224522825463 
Sentiment : F1 score = 0.8537956333387183 | AUC-ROC = 0.9856194036547401 
Epoch : 7 | Validation Score :0.18544289625999405
EarlyStopping counter: 1 out of 3


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9323524990723737 | AUC-ROC = 0.9838895710445006 
Sentiment : F1 score = 0.8476264181011954 | AUC-ROC = 0.9833228736978984 
Epoch : 8 | Validation Score :0.1963208260332666
EarlyStopping counter: 2 out of 3


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.932987303490783 | AUC-ROC = 0.9832806067172265 
Sentiment : F1 score = 0.865156867293992 | AUC-ROC = 0.9837128570609458 
Epoch : 9 | Validation Score :0.19440459412772482
EarlyStopping counter: 3 out of 3
Early stopping
################################################ Fold 4 ##################################################
Starting training....


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.6000392993730883 | AUC-ROC = 0.7986078949446488 
Sentiment : F1 score = 0.42738505998275195 | AUC-ROC = 0.751124202064641 
Epoch : 1 | Validation Score :0.6613220468400016
Validation score improved (inf --> 0.6613220468400016). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.8697110105521279 | AUC-ROC = 0.9679873461486748 
Sentiment : F1 score = 0.7279112723256826 | AUC-ROC = 0.961596462267897 
Epoch : 2 | Validation Score :0.3136174109865603
Validation score improved (0.6613220468400016 --> 0.3136174109865603). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.8765942686501844 | AUC-ROC = 0.965567342435114 
Sentiment : F1 score = 0.7737983969363905 | AUC-ROC = 0.9629880866510692 
Epoch : 3 | Validation Score :0.2775494522518582
Validation score improved (0.3136174109865603 --> 0.2775494522518582). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.8935453953833878 | AUC-ROC = 0.9740048084208189 
Sentiment : F1 score = 0.7780891046866155 | AUC-ROC = 0.9725714953637947 
Epoch : 4 | Validation Score :0.2478251110794198
Validation score improved (0.2775494522518582 --> 0.2478251110794198). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9293248848946472 | AUC-ROC = 0.9816208625278249 
Sentiment : F1 score = 0.8376221630950634 | AUC-ROC = 0.9792882336818415 
Epoch : 5 | Validation Score :0.18779388085116833
Validation score improved (0.2478251110794198 --> 0.18779388085116833). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9322437085092231 | AUC-ROC = 0.9866708248586606 
Sentiment : F1 score = 0.8746141731161764 | AUC-ROC = 0.9837840630843377 
Epoch : 6 | Validation Score :0.16688478190761163
Validation score improved (0.18779388085116833 --> 0.16688478190761163). Saving model!


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9263014742621668 | AUC-ROC = 0.9814021572799663 
Sentiment : F1 score = 0.8621762343876861 | AUC-ROC = 0.9790020748756239 
Epoch : 7 | Validation Score :0.19587059536441634
EarlyStopping counter: 1 out of 3


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9279528903049613 | AUC-ROC = 0.9848310116464947 
Sentiment : F1 score = 0.8441312673342125 | AUC-ROC = 0.981477967293415 
Epoch : 8 | Validation Score :0.18573655018876323
EarlyStopping counter: 2 out of 3


HBox(children=(FloatProgress(value=0.0, max=205.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=52.0), HTML(value='')))


Aspect : F1 score = 0.9246360581210604 | AUC-ROC = 0.9778749148975676 
Sentiment : F1 score = 0.8427151240287766 | AUC-ROC = 0.9749438067021039 
Epoch : 9 | Validation Score :0.21939369426614927
EarlyStopping counter: 3 out of 3
Early stopping
Predicting for labeled test dataset....


HBox(children=(FloatProgress(value=0.0, max=78.0), HTML(value='')))


Aspect : F1 score = 0.919977595973938 | AUC-ROC = 0.9816645312118215 
Sentiment : F1 score = 0.8313261977456582 | AUC-ROC = 0.9788193759332019 


HBox(children=(FloatProgress(value=0.0, max=78.0), HTML(value='')))


Aspect : F1 score = 0.9169906879975698 | AUC-ROC = 0.9803696311021497 
Sentiment : F1 score = 0.8230834921869182 | AUC-ROC = 0.9772553422814351 


HBox(children=(FloatProgress(value=0.0, max=78.0), HTML(value='')))


Aspect : F1 score = 0.9220879936334976 | AUC-ROC = 0.9833754219864086 
Sentiment : F1 score = 0.8479515545707371 | AUC-ROC = 0.9817465075512161 


HBox(children=(FloatProgress(value=0.0, max=78.0), HTML(value='')))


Aspect : F1 score = 0.9207073830140259 | AUC-ROC = 0.9818792724607998 
Sentiment : F1 score = 0.8258857603978278 | AUC-ROC = 0.9795276180640832 


HBox(children=(FloatProgress(value=0.0, max=78.0), HTML(value='')))


Aspect : F1 score = 0.9215427215772308 | AUC-ROC = 0.9820084601464113 
Sentiment : F1 score = 0.8477984426920454 | AUC-ROC = 0.9802110382253929 
Final Results : 
Aspect : F1 score = 0.9285477373868725    | AUC-ROC = 0.9890571081316272 
Sentiment : F1 score = 0.8473144196838804    | AUC-ROC = 0.9876693162916941 
Generating predictions for complete test data....


HBox(children=(FloatProgress(value=0.0, max=116.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=116.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=116.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=116.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=116.0), HTML(value='')))


