---
# Emotion Regression: How angry are you?
----

Resources:
* Regression with simple transformers: https://simpletransformers.ai/docs/regression/
* simple transformers: https://simpletransformers.ai/

In [1]:
!pip install simpletransformers
!pip install transformers
!pip install datasets
!pip install sklearn
!pip install pandas

Collecting simpletransformers
  Downloading simpletransformers-0.64.3-py3-none-any.whl (250 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.8/250.8 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
Collecting datasets (from simpletransformers)
  Downloading datasets-2.15.0-py3-none-any.whl (521 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m521.2/521.2 kB[0m [31m34.9 MB/s[0m eta [36m0:00:00[0m
Collecting seqeval (from simpletransformers)
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting wandb>=0.10.32 (from simpletransformers)
  Downloading wandb-0.16.0-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m72.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting streamlit (from simpletransformers)
  Downloading

In [2]:
from random import shuffle
from simpletransformers.classification import ClassificationModel, ClassificationArgs
import pandas as pd
import logging
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
import os

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# 🐕 Inspect and transform the data 🐕

In [6]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1702 entries, 0 to 1701
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       1702 non-null   object
dtypes: object(1)
memory usage: 13.4+ KB


In [24]:
import pandas as pd
from io import StringIO


with open("/content/drive/Shareddrives/ML for NLP/HW5/EI-reg-En-anger-train.txt", "r") as f:
  train_df = pd.DataFrame(f)

train_df.head(3)

with open("/content/drive/Shareddrives/ML for NLP/HW5/2018-EI-reg-En-anger-test-gold.txt", "r") as f:
  test_df = pd.DataFrame(f)

test_df.head(3)



def clean_transform(df):
    split_df = df[0].str.split('\t', expand=True)
    clean_df = split_df.applymap(lambda x: x.strip() if pd.notna(x) else x)
    clean_df.columns = clean_df.iloc[0]
    clean_df = clean_df.drop(0)  # it contains column names
    clean_df = clean_df.reset_index(drop=True)
    return clean_df

# Example usage:
train_df = clean_transform(train_df)
test_df = clean_transform(test_df)


train_df['Intensity Score'] = pd.to_numeric(train_df['Intensity Score'], errors='coerce')
test_df['Intensity Score'] = pd.to_numeric(test_df['Intensity Score'], errors='coerce')


train_df['Tweet'] = train_df['Tweet'].astype(str)
test_df['Tweet'] = test_df['Tweet'].astype(str)
train_df['Affect Dimension'] = train_df['Affect Dimension'].astype(str)
test_df['Affect Dimension'] = test_df['Affect Dimension'].astype(str)


train_df = train_df[['Tweet', 'Affect Dimension', 'Intensity Score']]
test_df = test_df[['Tweet', 'Affect Dimension', 'Intensity Score']]
import pandas as pd


train_df = pd.DataFrame(train_df, columns=['Tweet', 'Affect Dimension', 'Intensity Score'])
test_df = pd.DataFrame(test_df, columns=['Tweet', 'Affect Dimension', 'Intensity Score'])

print(train_df.head(4))
train_df.info()

                                               Tweet Affect Dimension  \
0  @xandraaa5 @amayaallyn6 shut up hashtags are c...            anger   
1  it makes me so fucking irate jesus. nobody is ...            anger   
2         Lol Adam the Bull with his fake outrage...            anger   
3  @THATSSHAWTYLO passed away early this morning ...            anger   

   Intensity Score  
0            0.562  
1            0.750  
2            0.417  
3            0.354  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1701 entries, 0 to 1700
Data columns (total 3 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Tweet             1701 non-null   object 
 1   Affect Dimension  1701 non-null   object 
 2   Intensity Score   1701 non-null   float64
dtypes: float64(1), object(2)
memory usage: 40.0+ KB


# 🐈 Preprocess 🐈

Since we are working with Twitter posts, the preprocessing should tackle the following potential issues:
* emojis
* colloqial speech with many typos, missing or misplaced punctuation, orthographic mistakes, etc.
* links (urls) and references


In [8]:
!pip install emoji

Collecting emoji
  Downloading emoji-2.8.0-py2.py3-none-any.whl (358 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m358.9/358.9 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: emoji
Successfully installed emoji-2.8.0


In [25]:
import re
import string
import emoji
import nltk
from nltk.corpus import stopwords

nltk.download('stopwords')
stopwords_english = stopwords.words('english')

# Define the emoji extractor function
def extract_emojis(text):
    if not isinstance(text, str):
        text = str(text)
    return [c for c in text if emoji.is_emoji(c)]

# Function to create a dictionary with emoji as key and textual description as value
def create_emoji_description_dict(emojis):
    emoji_to_text_dict = {}
    for emoji_key in emojis:
        description = emoji.demojize(emoji_key).replace(':', '').replace('_', ' ')
        description_without_face = ' '.join([word for word in description.split() if word.lower() != 'face'])
        emoji_to_text_dict[emoji_key] = description_without_face
    return emoji_to_text_dict

# Function to replace emojis with translated text in a given text
def replace_emojis_with_translated_text(text, translation_dict):
    for emoji_key, translated_word in translation_dict.items():
        emoji_with_spaces = f" {emoji_key} "
        text = text.replace(emoji_with_spaces, f" {translated_word} ")
        text = text.replace(f"{emoji_key} ", f" {translated_word} ").replace(f" {emoji_key}", f" {translated_word} ")
    return text

# Preprocessing functions
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'<.*?>', '', text)  # Remove HTML tags
    text = re.sub(r'https?://\S+|www\.\S+', '', text)  # Remove URLs
    text = re.sub(r'@\S+', '', text)  # Remove user mentions
    text = text.translate(str.maketrans('', '', string.punctuation))  # Remove punctuation
    return text

# Manually selected chat words
chat_words = {
    'fyi': 'for your information',
    'lol': 'laugh out loud',
    'imo': 'in my opinion',
    'imho': 'in my humble opinion',
    'tmi': 'too much information',
    'bff': 'best friends forever',
    'idk': "I don't know",
    'omg': 'oh my god',
    'btw': 'by the way',
    'wtf': 'what the fuck',
    'fu': 'fuck you'
}

# Function to replace chat words
def replace_chat_words(text):
    new_text = []
    for word in text.split():
        if word.upper() in chat_words:
            new_text.append(chat_words[word.upper()])
        else:
            new_text.append(word)
    return ' '.join(new_text)

# Apply preprocessing to the "Tweet" column
all_emojis_train = [emoji for text in train_df['Tweet'] for emoji in extract_emojis(text)]
emoji_to_text_dict_train = create_emoji_description_dict(all_emojis_train)

train_df['Tweet'] = train_df['Tweet'].apply(lambda x: replace_emojis_with_translated_text(x, emoji_to_text_dict_train))
train_df['Tweet'] = train_df['Tweet'].apply(preprocess_text)
train_df['Tweet'] = train_df['Tweet'].apply(replace_chat_words)

all_emojis_test = [emoji for text in test_df['Tweet'] for emoji in extract_emojis(text)]
emoji_to_text_dict_test = create_emoji_description_dict(all_emojis_test)

test_df['Tweet'] = test_df['Tweet'].apply(lambda x: replace_emojis_with_translated_text(x, emoji_to_text_dict_test))
test_df['Tweet'] = test_df['Tweet'].apply(preprocess_text)
test_df['Tweet'] = test_df['Tweet'].apply(replace_chat_words)


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [10]:
train_df.head(3)

Unnamed: 0,Tweet,Affect Dimension,Intensity Score
0,shut up hashtags are cool offended,anger,0.562
1,it makes me so fucking irate jesus nobody is c...,anger,0.75
2,lol adam the bull with his fake outrage,anger,0.417


In [11]:
test_df.head(3)

Unnamed: 0,Tweet,Affect Dimension,Intensity Score
0,i know you mean well but im offended prick,anger,0.734
1,let go of resentment it will hold you back do ...,anger,0.422
2,no im not depressed because of the weather im ...,anger,0.663


____

# Prepare the data and model

In [26]:


train_df = train_df.rename(columns={'Tweet' : 'text', 'Intensity Score' : 'labels'})
test_df = test_df.rename(columns={'Tweet' : 'text', 'Intensity Score' : 'labels'})

In [27]:
print(train_df.head(1))
train_df.info()

                                 text Affect Dimension  labels
0  shut up hashtags are cool offended            anger   0.562
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1701 entries, 0 to 1700
Data columns (total 3 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   text              1701 non-null   object 
 1   Affect Dimension  1701 non-null   object 
 2   labels            1701 non-null   float64
dtypes: float64(1), object(2)
memory usage: 40.0+ KB


## RoBERTa

In [42]:
from scipy.stats import pearsonr
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


# define args + enable regression
model_args = ClassificationArgs(
    num_train_epochs=3,
    manual_seed=42,
    train_batch_size=4,
    max_seq_length=128,
    use_multiprocessing=False,
    use_multiprocessing_for_evaluation=False,
    regression=True,
    overwrite_output_dir=True
)

os.environ["TOKENIZERS_PARALLELISM"] = "false"



# Create a ClassificationModel
roberta_model = ClassificationModel(
    "roberta",
    "roberta-base",
    num_labels=1,
    args=model_args
)

# train
roberta_results = roberta_model.train_model(train_df)

# evaluate
result, model_outputs, wrong_predictions = roberta_model.eval_model(test_df)


for epoch, result in enumerate(roberta_results):
    print(f"Epoch {epoch + 1} - Evaluation Result: {result}")


final_result = roberta_results[-1]

actual = test_df['labels'].values
predicted = model_outputs.reshape(-1)

mse = mean_squared_error(actual, predicted)
mae = mean_absolute_error(actual, predicted)
r2 = r2_score(actual, predicted)

# final results
print(f"Final Evaluation Result: {final_result}")
print(f"MSE: {mse}")
print(f"MAE: {mae}")
print(f"R-squared: {r2}")


pearson_corr, _ = pearsonr(actual, predicted)
print(f"Pearson correlation coefficient is equal to {pearson_corr}")



Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classifier.dense.bias', 'classifier.out_proj.bias', 'classifier.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/426 [00:00<?, ?it/s]



Running Epoch 1 of 3:   0%|          | 0/426 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/426 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/126 [00:00<?, ?it/s]

Epoch 1 - Evaluation Result: 1278
Epoch 2 - Evaluation Result: 0.02565928924196044
Final Evaluation Result: 0.02565928924196044
MSE: 0.014600522629855874
MAE: 0.09594730266030439
R-squared: 0.593159852766051
Pearson correlation coefficient is equal to 0.7750003199385703


In [32]:
for i in wrong_predictions[:20]:
  print(f'{i}')


{'guid': 0, 'text_a': 'i know you mean well but im offended prick', 'text_b': None, 'label': 0.734}
{'guid': 1, 'text_a': 'let go of resentment it will hold you back do not worry about what could of been what is to come is what matters', 'text_b': None, 'label': 0.422}
{'guid': 2, 'text_a': 'no im not depressed because of the weather im depressed because i have depression sicknotweak', 'text_b': None, 'label': 0.663}
{'guid': 3, 'text_a': 'amarnathterrorattack muslims are killing everywhere syria iraq palestine everyday beyond they say that islam is terrorism shame on you', 'text_b': None, 'label': 0.703}
{'guid': 4, 'text_a': 'prepare to suffer the sting of ghost riders power prepare to know the true meaning of hell', 'text_b': None, 'label': 0.719}
{'guid': 5, 'text_a': 'weve been broken up a while both moved on shes got a kid i dont hold any animosity towards her anymore', 'text_b': None, 'label': 0.359}
{'guid': 6, 'text_a': 'just know usa all canadians dont agree with what khadrs 

In [None]:
for i in model_outputs[:20]:
  print(f'{i}')

## DistilBERT

In [44]:
from scipy.stats import pearsonr



# define args + enable regression
model_args = ClassificationArgs(
    num_train_epochs=3,
    manual_seed=42,
    train_batch_size=4,
    max_seq_length=128,
    regression=True,
    use_multiprocessing=False,
    use_multiprocessing_for_evaluation=False,
    overwrite_output_dir=True
)


# Create a ClassificationModel
distilbert_model = ClassificationModel(
    "distilbert",
    "distilbert-base-uncased",
    num_labels=1,
    args=model_args
)

os.environ["TOKENIZERS_PARALLELISM"] = "false"

# train
distilbert_results = distilbert_model.train_model(train_df)

# eval
result, model_outputs, wrong_predictions = distilbert_model.eval_model(test_df)


for epoch, result in enumerate(distilbert_results):
    print(f"Epoch {epoch + 1} - Evaluation Result: {result}")


final_result = distilbert_results[-1]

actual = test_df['labels'].values
predicted = model_outputs.reshape(-1)

mse = mean_squared_error(actual, predicted)
mae = mean_absolute_error(actual, predicted)
r2 = r2_score(actual, predicted)

# final results
print(f"Final Evaluation Result: {final_result}")
print(f"MSE: {mse}")
print(f"MAE: {mae}")
print(f"R-squared: {r2}")


pearson_corr, _ = pearsonr(actual, predicted)
print(f"Pearson correlation coefficient is equal to {pearson_corr}")


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/426 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/426 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/426 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/126 [00:00<?, ?it/s]

Epoch 1 - Evaluation Result: 1278
Epoch 2 - Evaluation Result: 0.01590356008241777
Final Evaluation Result: 0.01590356008241777
MSE: 0.015233453937736098
MAE: 0.09912029456711575
R-squared: 0.5755233699486199
Pearson correlation coefficient is equal to 0.764091546948262


# DistilBERT without preprocessing

In [53]:
import pandas as pd
from io import StringIO


with open("/content/drive/Shareddrives/ML for NLP/HW5/EI-reg-En-anger-train.txt", "r") as f:
  train_df = pd.DataFrame(f)

train_df.head(3)

with open("/content/drive/Shareddrives/ML for NLP/HW5/2018-EI-reg-En-anger-test-gold.txt", "r") as f:
  test_df = pd.DataFrame(f)

test_df.head(3)



def clean_transform(df):
    split_df = df[0].str.split('\t', expand=True)
    clean_df = split_df.applymap(lambda x: x.strip() if pd.notna(x) else x)
    clean_df.columns = clean_df.iloc[0]
    clean_df = clean_df.drop(0)  # it contains column names
    clean_df = clean_df.reset_index(drop=True)
    return clean_df

# Example usage:
train_df = clean_transform(train_df)
test_df = clean_transform(test_df)


train_df = pd.DataFrame(train_df, columns=['Tweet', 'Intensity Score'])
test_df = pd.DataFrame(test_df, columns=['Tweet', 'Intensity Score'])

train_df['Intensity Score'] = pd.to_numeric(train_df['Intensity Score'], errors='coerce')
test_df['Intensity Score'] = pd.to_numeric(test_df['Intensity Score'], errors='coerce')
train_df['Tweet'] = train_df['Tweet'].astype(str)
test_df['Tweet'] = test_df['Tweet'].astype(str)


train_df = train_df.rename(columns={'Tweet' : 'text', 'Intensity Score' : 'labels'})
test_df = test_df.rename(columns={'Tweet' : 'text', 'Intensity Score' : 'labels'})




# define args + enable regression
model_args = ClassificationArgs(
    num_train_epochs=3,
    manual_seed=42,
    train_batch_size=4,
    max_seq_length=128,
    regression=True,
    use_multiprocessing=False,
    use_multiprocessing_for_evaluation=False,
    overwrite_output_dir=True
)


# Create a ClassificationModel
distilbert_model = ClassificationModel(
    "distilbert",
    "distilbert-base-uncased",
    num_labels=1,
    args=model_args
)

os.environ["TOKENIZERS_PARALLELISM"] = "false"

# train
distilbert_results = distilbert_model.train_model(train_df)

# eval
result, model_outputs, wrong_predictions = distilbert_model.eval_model(test_df)


for epoch, result in enumerate(distilbert_results):
    print(f"Epoch {epoch + 1} - Evaluation Result: {result}")


final_result = distilbert_results[-1]

actual = test_df['labels'].values
predicted = model_outputs.reshape(-1)

mse = mean_squared_error(actual, predicted)
mae = mean_absolute_error(actual, predicted)
r2 = r2_score(actual, predicted)

# final results
print(f"Final Evaluation Result: {final_result}")
print(f"MSE: {mse}")
print(f"MAE: {mae}")
print(f"R-squared: {r2}")


pearson_corr, _ = pearsonr(actual, predicted)
print(f"Pearson correlation coefficient is equal to {pearson_corr}")


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/426 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/426 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/426 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/126 [00:00<?, ?it/s]

Epoch 1 - Evaluation Result: 1278
Epoch 2 - Evaluation Result: 0.017202431368018488
Final Evaluation Result: 0.017202431368018488
MSE: 0.018057822387478308
MAE: 0.10626111448977046
R-squared: 0.4968230038681364
Pearson correlation coefficient is equal to 0.7071913906707861


# DistilBERT preprocessed - without emoijs

In [56]:
import pandas as pd
from io import StringIO


with open("/content/drive/Shareddrives/ML for NLP/HW5/EI-reg-En-anger-train.txt", "r") as f:
  train_df = pd.DataFrame(f)

train_df.head(3)

with open("/content/drive/Shareddrives/ML for NLP/HW5/2018-EI-reg-En-anger-test-gold.txt", "r") as f:
  test_df = pd.DataFrame(f)

test_df.head(3)



def clean_transform(df):
    split_df = df[0].str.split('\t', expand=True)
    clean_df = split_df.applymap(lambda x: x.strip() if pd.notna(x) else x)
    clean_df.columns = clean_df.iloc[0]
    clean_df = clean_df.drop(0)  # it contains column names
    clean_df = clean_df.reset_index(drop=True)
    return clean_df

# Example usage:
train_df = clean_transform(train_df)
test_df = clean_transform(test_df)

# keep only necessary columns
train_df = pd.DataFrame(train_df, columns=['Tweet', 'Intensity Score'])
test_df = pd.DataFrame(test_df, columns=['Tweet', 'Intensity Score'])

# enforce types
train_df['Intensity Score'] = pd.to_numeric(train_df['Intensity Score'], errors='coerce')
test_df['Intensity Score'] = pd.to_numeric(test_df['Intensity Score'], errors='coerce')
train_df['Tweet'] = train_df['Tweet'].astype(str)
test_df['Tweet'] = test_df['Tweet'].astype(str)

# preprocess
train_df['Tweet'] = train_df['Tweet'].apply(preprocess_text)
train_df['Tweet'] = train_df['Tweet'].apply(replace_chat_words)
test_df['Tweet'] = test_df['Tweet'].apply(preprocess_text)
test_df['Tweet'] = test_df['Tweet'].apply(replace_chat_words)

# rename
train_df = train_df.rename(columns={'Tweet' : 'text', 'Intensity Score' : 'labels'})
test_df = test_df.rename(columns={'Tweet' : 'text', 'Intensity Score' : 'labels'})




# define args + enable regression
model_args = ClassificationArgs(
    num_train_epochs=3,
    manual_seed=42,
    train_batch_size=4,
    max_seq_length=128,
    regression=True,
    use_multiprocessing=False,
    use_multiprocessing_for_evaluation=False,
    overwrite_output_dir=True
)


# Create a ClassificationModel
distilbert_model = ClassificationModel(
    "distilbert",
    "distilbert-base-uncased",
    num_labels=1,
    args=model_args
)

os.environ["TOKENIZERS_PARALLELISM"] = "false"

# train
distilbert_results = distilbert_model.train_model(train_df)

# eval
result, model_outputs, wrong_predictions = distilbert_model.eval_model(test_df)


for epoch, result in enumerate(distilbert_results):
    print(f"Epoch {epoch + 1} - Evaluation Result: {result}")


final_result = distilbert_results[-1]

actual = test_df['labels'].values
predicted = model_outputs.reshape(-1)

mse = mean_squared_error(actual, predicted)
mae = mean_absolute_error(actual, predicted)
r2 = r2_score(actual, predicted)

# final results
print(f"Final Evaluation Result: {final_result}")
print(f"MSE: {mse}")
print(f"MAE: {mae}")
print(f"R-squared: {r2}")


pearson_corr, _ = pearsonr(actual, predicted)
print(f"Pearson correlation coefficient is equal to {pearson_corr}")


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight', 'classifier.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch:   0%|          | 0/3 [00:00<?, ?it/s]

Running Epoch 0 of 3:   0%|          | 0/426 [00:00<?, ?it/s]

Running Epoch 1 of 3:   0%|          | 0/426 [00:00<?, ?it/s]

Running Epoch 2 of 3:   0%|          | 0/426 [00:00<?, ?it/s]

Running Evaluation:   0%|          | 0/126 [00:00<?, ?it/s]

Epoch 1 - Evaluation Result: 1278
Epoch 2 - Evaluation Result: 0.01579091957803673
Final Evaluation Result: 0.01579091957803673
MSE: 0.016551651201189163
MAE: 0.10328048980164671
R-squared: 0.5387921115996901
Pearson correlation coefficient is equal to 0.7400285024092246


# Compare

In [60]:
results_data = [
    {'Model': 'RoBERTa',
     'Evaluation Result': 0.02565928924196044,
     'MSE': 0.014600522629855874,
     'MAE': 0.09594730266030439,
     'R-squared': 0.593159852766051,
     'Pearson' : 0.7750003199385703},
    {'Model': 'DistilBERT',
     'Evaluation Result': 0.01590356008241777,
     'MSE': 0.015233453937736098,
     'MAE': 0.09912029456711575,
     'R-squared': 0.5755233699486199,
     'Pearson' : 0.764091546948262},
    {'Model': 'DistilBERT_raw_wo-emoji',
     'Evaluation Result': 0.01579091957803673,
     'MSE': 0.016551651201189163,
     'MAE': 0.10328048980164671,
     'R-squared': 0.5387921115996901,
     'Pearson' : 0.7400285024092246},
    {'Model': 'DistilBERT_raw',
     'Evaluation Result': 0.017202431368018488,
     'MSE': 0.018057822387478308,
     'MAE': 0.10626111448977046,
     'R-squared': 0.4968230038681364,
     'Pearson' : 0.7071913906707861}

]

# Create a DataFrame
df = pd.DataFrame(results_data)
df

Unnamed: 0,Model,Evaluation Result,MSE,MAE,R-squared,Pearson
0,RoBERTa,0.025659,0.014601,0.095947,0.59316,0.775
1,DistilBERT,0.015904,0.015233,0.09912,0.575523,0.764092
2,DistilBERT_raw_wo-emoji,0.015791,0.016552,0.10328,0.538792,0.740029
3,DistilBERT_raw,0.017202,0.018058,0.106261,0.496823,0.707191
