# Ex05 SENT

## Import and Load

In [1]:
! pip install simpletransformers

Collecting simpletransformers
  Downloading simpletransformers-0.64.3-py3-none-any.whl (250 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/250.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.2/250.8 kB[0m [31m2.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.8/250.8 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Collecting datasets (from simpletransformers)
  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
Collecting seqeval (from simpletransformers)
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting wandb>=0.10.32 (from simp

In [2]:
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import torch
import numpy as np
import pandas as pd
import nltk
import datasets

### !! NOTE: Make sure you have already put the two files in the same directory as this notebook.

In [4]:
test_set = pd.read_csv("2018-EI-reg-En-anger-test-gold.txt", sep='\t')
train_set = pd.read_csv("EI-reg-En-anger-train.txt", sep='\t')

In [5]:
test_set[:1]

Unnamed: 0,ID,Tweet,Affect Dimension,Intensity Score
0,2018-En-02328,@PageShhh1 I know you mean well but I'm offend...,anger,0.734


In [6]:
len(train_set), len(test_set)

(1701, 1002)

In [7]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu:0')
device

device(type='cuda', index=0)

In [8]:
import logging


logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

## Pre-Processing

In [9]:
import re

def preprocess_twitter_text(text):
    # Remove usernames start with "@" symbols
    text = re.sub(r'@(\w+)', '', text)

    # Remove hashtags start with "#" symbols
    text = re.sub(r'#(\w+)', '', text)

    # Remove URLs
    text = re.sub(r'http\S+|www\S+|https\S+', '', text, flags=re.MULTILINE)

    # Replace emojis
    text = re.sub(r'[\U00010000-\U0010ffff]', ' ', text)

    return text.strip()



In [10]:
test_set['Tweet'] = test_set['Tweet'].apply(preprocess_twitter_text)
train_set['Tweet'] = train_set['Tweet'].apply(preprocess_twitter_text)

In [11]:
# change column names for training (text, labels)
test_set.columns = ['ID', 'text', 'Affect Dimension', 'labels']
train_set.columns = ['ID', 'text', 'Affect Dimension', 'labels']

In [12]:
test_set[:1]

Unnamed: 0,ID,text,Affect Dimension,labels
0,2018-En-02328,I know you mean well but I'm offended. Prick.,anger,0.734


## Fine-tune

In [20]:
# config model args
model_args = ClassificationArgs(  regression = True,
                                  use_early_stopping=True,
                                  weight_decay=0.0001,
                                  output_dir=f"./results/",
                                  overwrite_output_dir=True,
                                  num_train_epochs=5,
                                  train_batch_size=8,
                                  learning_rate=5e-5,
                                  save_steps=100,
                                  logging_steps=100,
                                  evaluate_during_training=True,
                                  evaluate_during_training_steps=100,
                                  evaluate_during_training_verbose=True,
)

In [None]:
# model = torch.load("outputs/best_model")

### Model 1: RoBERTa-base

RoBERTa is a larger model compared to BERT, and has demonstrated improved performance on various natural language understanding benchmarks compared to BERT

In [22]:
model_1 = ClassificationModel(
    "roberta",
    "roberta-base",
    num_labels=1,
    args=model_args,
)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [15]:
result, model_outputs, wrong_predictions = model_1.eval_model(test_set)

  0%|          | 0/1002 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/126 [00:00<?, ?it/s]

In [16]:
result #loss before training

{'eval_loss': 0.2698360129244744}

In [24]:
model_1.train_model(train_set, eval_df=test_set)

  0%|          | 0/1701 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/213 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/213 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/213 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/213 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/213 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

(1065,
 defaultdict(list,
             {'global_step': [100,
               200,
               213,
               300,
               400,
               426,
               500,
               600,
               639,
               700,
               800,
               852,
               900,
               1000,
               1065],
              'train_loss': [0.026216039434075356,
               0.03989364206790924,
               0.007765922229737043,
               0.011544703505933285,
               0.016820421442389488,
               0.019718647003173828,
               0.029210586100816727,
               0.017947068437933922,
               0.013703646138310432,
               0.009284302592277527,
               0.01282678171992302,
               0.010440691374242306,
               0.01564740017056465,
               0.012845151126384735,
               0.00783117301762104],
              'eval_loss': [0.034427800393175514,
               0.024495978250227396,
   

In [28]:
result, model_outputs, wrong_predictions = model_1.eval_model(test_set)

  0%|          | 0/1002 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/126 [00:00<?, ?it/s]

In [29]:
result # loss after training

{'eval_loss': 0.01974705606883776}

In [33]:
from scipy import stats
stats.pearsonr(test_set['labels'], model_outputs)

PearsonRResult(statistic=0.6713500811489068, pvalue=2.9842410037492584e-132)

### Model 2: DistilBERT-base-uncased

DistilBERT is a smaller and more light weight version of BERT, which generally performs at a slightly lower level than BERT on certain downstream tasks

In [34]:
model_2 = ClassificationModel(
    "distilbert",
    "distilbert-base-uncased",
    num_labels=1,
    args=model_args
)

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['pre_classifier.bias', 'classifier.bias', 'pre_classifier.weight', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [35]:
result, model_outputs, wrong_predictions = model_2.eval_model(test_set)
result #loss before training

  0%|          | 0/1002 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/126 [00:00<?, ?it/s]

{'eval_loss': 0.134654874691651}

In [36]:
model_2.train_model(train_set, eval_df=test_set)

  0%|          | 0/1701 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/213 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/213 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/213 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/213 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

(800,
 defaultdict(list,
             {'global_step': [100,
               200,
               213,
               300,
               400,
               426,
               500,
               600,
               639,
               700,
               800],
              'train_loss': [0.009504495188593864,
               0.021258283406496048,
               0.012048902921378613,
               0.017816681414842606,
               0.017134809866547585,
               0.014592326246201992,
               0.004440532997250557,
               0.013692796230316162,
               0.010168412700295448,
               0.004877178929746151,
               0.006932693067938089],
              'eval_loss': [0.03194133304096463,
               0.026184774125881848,
               0.03103665308749658,
               0.025299931118737847,
               0.023932981873965926,
               0.02902314826358287,
               0.025782981379297635,
               0.024607030826339884,
           

In [37]:
result, model_outputs, wrong_predictions = model_2.eval_model(test_set)
result # loss after training

  0%|          | 0/1002 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/126 [00:00<?, ?it/s]

{'eval_loss': 0.024498287606836547}

In [38]:
stats.pearsonr(test_set['labels'], model_outputs)

PearsonRResult(statistic=0.5864908827990607, pvalue=1.2418882996087448e-93)

### Model 3: bert-base-cased

In [41]:
model_3 = ClassificationModel(
    "bert",
    "bert-base-cased",
    num_labels=1,
    args=model_args
)

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-cased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

In [42]:
result, model_outputs, wrong_predictions = model_3.eval_model(test_set)
result #loss before training

  0%|          | 0/1002 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/126 [00:00<?, ?it/s]

{'eval_loss': 0.8620992028524005}

In [43]:
model_3.train_model(train_set, eval_df=test_set)

  0%|          | 0/1701 [00:00<?, ?it/s]

Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

Running Epoch 0 of 5:   0%|          | 0/213 [00:00<?, ?it/s]



  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

Running Epoch 1 of 5:   0%|          | 0/213 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

Running Epoch 2 of 5:   0%|          | 0/213 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/213 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/213 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

  0%|          | 0/1002 [00:00<?, ?it/s]

(1065,
 defaultdict(list,
             {'global_step': [100,
               200,
               213,
               300,
               400,
               426,
               500,
               600,
               639,
               700,
               800,
               852,
               900,
               1000,
               1065],
              'train_loss': [0.0819941908121109,
               0.022637585178017616,
               0.03674078360199928,
               0.03936142474412918,
               0.021683180704712868,
               0.0198660921305418,
               0.00643895473331213,
               0.01422406267374754,
               0.012253752909600735,
               0.011885491199791431,
               0.005486389622092247,
               0.006905128248035908,
               0.0027636983431875706,
               0.0025242697447538376,
               0.012565585784614086],
              'eval_loss': [0.0501824656560544,
               0.04098616472985004,
        

In [44]:
result, model_outputs, wrong_predictions = model_3.eval_model(test_set)
result # loss after training

  0%|          | 0/1002 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/126 [00:00<?, ?it/s]

{'eval_loss': 0.021428737974178696}

In [45]:
stats.pearsonr(test_set['labels'], model_outputs)

PearsonRResult(statistic=0.6384610598982966, pvalue=7.792411401963222e-116)