# Sentiment analysis

For the sentiment analysis, we tried out several different models and pre-processing pipelines. Especially for dealing with comments or descriptions in the lines, like [laughing] or [to camera], we tried out different methods to see which resulted in the best score for the sentiment analysis.

We mainly used sentiment analysis based on pre-trained models, and then tested the accuracy by comparing the predicted sentiment with the sentiments given by us in the annotated sample (of 300 lines).

## 1. Pre-processing pipeline

In [2]:
import pandas as pd
df = pd.read_csv("The_Office_lines.csv")

In [3]:
relevant_columns = ["id","speaker", "line_text"]
df = df[relevant_columns]

In [4]:
import re

# deals with descriptions in lines, e.g. [laughs] or [to camera]
def deal_with_description(line, mode):
    if mode=="remove":
        # remove text that is between brackets
        line = re.sub(r'\[.*?\]', '', line)
    elif mode=="end":
        # move all the text that is in the brackets to the end of the line
        line = re.sub(r'\[.*?\]', '', line) + " " + ", ".join(re.findall(r"\[(.*?)\]", line))
    elif mode=="start":
        # move all the text that is in the brackets to the start of the line
        line = ", ".join(re.findall(r"\[(.*?)\]", line)) + " " + re.sub(r'\[.*?\]', '', line)
    elif mode=="keep":
        # remove all brackets from the line but keep text in place
        line = re.sub(r"[\([{})\]]", '', line)
    return line

def preprocess_sentiment(df, relevant_columns, description_mode):
    # filter out relevant columns
    df = df[relevant_columns]
    # deal with descriptions in lines
    df["line_text"] = df["line_text"].apply(lambda x: deal_with_description(x, mode=description_mode))
    
    return df

In [5]:
df_processed = preprocess_sentiment(df, relevant_columns, description_mode="keep")

## 2. Sentiment analysis

I applied the sentiment analysis first only the the sample labeled by us, and then applied the best performing combination of pipeline and model to the whole dataset.

#### Function to extract ids that have been annotated by us:

In [6]:
def annotated_ids():
    df_luuk = pd.read_csv("annotated_data/sample_Luuk.csv")
    df_shan = pd.read_csv("annotated_data/sample_Shantanu.csv")
    df_elin = pd.read_csv("annotated_data/sample_Eline.csv")

    # combine annotations
    df_combined = pd.concat([df_luuk, df_shan, df_elin], axis=0)

    # filter out only columns that have something in "Sentiment" column
    df_annotated = df_combined[df_combined["Sentiment"].notna()]

    return df_annotated

### Function to test the accuracy of the sentiment analysis

In [36]:
# import metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error

# translating strings of sentiment to integers
trans_dict_roberta = {
    "NEGATIVE": -1,
    "POSITIVE": 1
}

trans_dict_bert = {
    "LABEL_0": -1,
    "LABEL_1": 0,
    "LABEL_2": 1
}

# extract predicted values from dataframe
def extract_ypred(df, source_column, transdict, write=True, target_column="temp"):
    df[target_column] = df[source_column].apply(lambda x: transdict[x[0]["label"]])
    Y_pred = df[target_column].values
    if not write:
        df = df.drop(columns=[target_column])
    return Y_pred

def result_score(Y_val, Y_pred, name, binary=False):
    # make new list replacing 0 with 1 if binary
    if binary:
        Y_val = [1 if x==0 else x for x in Y_val]

    # calculate metrics
    accuracy = accuracy_score(Y_val, Y_pred)
    precision = precision_score(Y_val, Y_pred, average="macro")
    recall = recall_score(Y_val, Y_pred, average="macro")
    f1 = f1_score(Y_val, Y_pred, average="macro")
    MSE = mean_squared_error(Y_val, Y_pred)

    # print results
    print(f"Analysis with {name}:\
          \n- - - - - - - - - - \
          \nAccuracy: {accuracy}\
          \nPrecision: {precision}\
          \nRecall: {recall}\
          \nF1: {f1}\
          \nMSE: {MSE}\n")

### Function to fit sentiment analysis model

In [9]:
# find current time
import time
#supress SettingWithCopyWarning
pd.options.mode.chained_assignment = None

def fit_sentiment(df_filtered, method, name, progress=True):
    # set start time
    start_time = time.time()

    # apply sentiment analysis to each line, track progress
    df_filtered[name] = ""

    # apply sentiment analysis to each line and track progress
    if progress:
        print(f"Fit sentiment analysis {name}")
    k = len(df_filtered)
    for iter, row in df_filtered.iterrows():
        df_filtered[name][iter] = method(row["line_text"])
        if progress:
            print(f"sample {iter+1} out of {k}. {round((iter+1)/k*100, 2)}%  ", end='\x1b[1K\r')

### Sentiment analysis models

In [11]:
# import first pre-trained sentiment analysis pipeline
from transformers import pipeline
sentiment_analysis_roberta = pipeline("sentiment-analysis",model="siebert/sentiment-roberta-large-english")
sentiment_analysis_bert  = pipeline("sentiment-analysis",model="sbcBI/sentiment_analysis_model")
sentiment_analysis_distilbert = pipeline("sentiment-analysis",model="distilbert-base-uncased-finetuned-sst-2-english")
sentiment_analysis_bert_uncased = pipeline("sentiment-analysis",model="Seethal/sentiment_analysis_generic_dataset")

In [12]:
sentiment_analysis_distilbert("I hate you")

[{'label': 'NEGATIVE', 'score': 0.9991129040718079}]

In [13]:
sentiment_analysis_bert_uncased("I hate you")

[{'label': 'LABEL_0', 'score': 0.9952951073646545}]

For testing, for now, take only the lines that have been annotated by us

In [14]:
# take only annotated lines
df_filtered = annotated_ids()
# reset index
df_filtered = df_filtered.reset_index(drop=True)

In [37]:
fit_sentiment(df_filtered, sentiment_analysis_roberta, "sentiment_analysis_roberta")
fit_sentiment(df_filtered, sentiment_analysis_bert, "sentiment_analysis_bert")
fit_sentiment(df_filtered, sentiment_analysis_distilbert, "sentiment_analysis_distilbert")
fit_sentiment(df_filtered, sentiment_analysis_bert_uncased, "sentiment_analysis_bert_uncased")

Fit sentiment analysis sentiment_analysis_roberta
Fit sentiment analysis sentiment_analysis_bert
Fit sentiment analysis sentiment_analysis_distilbert
Fit sentiment analysis sentiment_analysis_bert_uncased
sample 217 out of 217. 100.0%[1K

In [39]:
# get values of Y_val
Y_val = df_filtered["Sentiment"].values

y_pred_roberta = extract_ypred(df_filtered, "sentiment_analysis_roberta", trans_dict_roberta, write=True, target_column="pred_sentiment_label_roberta")
y_pred_bert = extract_ypred(df_filtered, "sentiment_analysis_bert", trans_dict_bert,  write=True, target_column="pred_sentiment_label_bert")
y_pred_bert_uncased = extract_ypred(df_filtered, "sentiment_analysis_bert_uncased", trans_dict_bert, write=True, target_column="pred_sentiment_label_bert_uncased")
y_pred_distilbert = extract_ypred(df_filtered, "sentiment_analysis_distilbert", trans_dict_roberta, write=True, target_column="pred_sentiment_label_distilbert")

result_score(Y_val, y_pred_roberta, "Roberta", binary=True)
result_score(Y_val, y_pred_distilbert, "DistilBERT", binary=True)
result_score(Y_val, y_pred_bert, "BERT", binary=False)
result_score(Y_val, y_pred_bert_uncased, "BERT Uncased", binary=False)

Analysis with Roberta:          
- - - - - - - - - -           
Accuracy: 0.6359447004608295          
Precision: 0.6626650660264106          
Recall: 0.7210955710955711          
F1: 0.6188157338847753          
MSE: 1.456221198156682

Analysis with DistilBERT:          
- - - - - - - - - -           
Accuracy: 0.6129032258064516          
Precision: 0.636748844375963          
Recall: 0.6861888111888111          
F1: 0.593850267379679          
MSE: 1.5483870967741935

Analysis with BERT:          
- - - - - - - - - -           
Accuracy: 0.5668202764976958          
Precision: 0.5795834989383376          
Recall: 0.633154960981048          
F1: 0.5713713425978206          
MSE: 0.5852534562211982

Analysis with BERT Uncased:          
- - - - - - - - - -           
Accuracy: 0.6221198156682027          
Precision: 0.605795265792266          
Recall: 0.6024191750278707          
F1: 0.602829144934408          
MSE: 0.48847926267281105



In [17]:
df_filtered.head()

Unnamed: 0,id,speaker,line_text,Sentiment,sentiment_analysis_roberta,sentiment_analysis_bert,sentiment_analysis_distilbert,sentiment_analysis_bert_uncased,pred_sentiment_label_roberta,pred_sentiment_label_bert,pred_sentiment_label_bert_uncased,pred_sentiment_label_distilbert
0,47252,Gabe,"Ok, but once this starts, it's going to be mov...",-1.0,"[{'label': 'POSITIVE', 'score': 0.983961701393...","[{'label': 'LABEL_0', 'score': 0.7348992228507...","[{'label': 'NEGATIVE', 'score': 0.768441677093...","[{'label': 'LABEL_1', 'score': 0.9918521046638...",1,-1,0,-1
1,15710,Andy,What?,0.0,"[{'label': 'NEGATIVE', 'score': 0.997746646404...","[{'label': 'LABEL_0', 'score': 0.4067431092262...","[{'label': 'NEGATIVE', 'score': 0.993637084960...","[{'label': 'LABEL_1', 'score': 0.9106979966163...",-1,-1,0,-1
2,44150,Dwight,"Just a little announcement folks, remember, th...",1.0,"[{'label': 'POSITIVE', 'score': 0.998121678829...","[{'label': 'LABEL_0', 'score': 0.3897318840026...","[{'label': 'NEGATIVE', 'score': 0.903421878814...","[{'label': 'LABEL_0', 'score': 0.5951375961303...",1,-1,-1,-1
3,45628,Phyllis,Is it true that you're making Dwight the manager?,-1.0,"[{'label': 'NEGATIVE', 'score': 0.993801534175...","[{'label': 'LABEL_1', 'score': 0.4273286163806...","[{'label': 'POSITIVE', 'score': 0.998319804668...","[{'label': 'LABEL_1', 'score': 0.9912397265434...",-1,0,0,1
4,27785,Pam,"Oh, damn. [Pam looks down at her salad] They'v...",0.0,"[{'label': 'NEGATIVE', 'score': 0.999482750892...","[{'label': 'LABEL_0', 'score': 0.6407026648521...","[{'label': 'NEGATIVE', 'score': 0.994488358497...","[{'label': 'LABEL_0', 'score': 0.9335484504699...",-1,-1,-1,-1


We get a descent accuracy, but a major limitation is that the classifier predicts into two classes (either positive or negative), while we have 3 classes (positive, neutral, negative). I solved this by setting the neutral class labeled by us to positive, but this obviously reduces the accuracy of the model by a lot.

### Combining measures

In [23]:
df_compare = df_filtered[["line_text", "Sentiment", "pred_sentiment_label_roberta", "pred_sentiment_label_bert", "pred_sentiment_label_bert_uncased", "pred_sentiment_label_distilbert"]]

# rename columns
df_compare.columns = ["line_text", "Annotated", "Roberta", "Bert", "Bert_uncased", "Distilbert"]

#### Functions to combine measures

1. Voting: take the majority vote of the different sentiment analyses

In [48]:
# make a new column with the majority vote
df_compare["Majority"] = df_compare[["Roberta", "Bert", "Bert_uncased", "Distilbert"]].mode(axis=1)[0]
df_compare["Majority_minus_BERT"] = df_compare[["Roberta", "Bert_uncased", "Distilbert"]].mode(axis=1)[0]

# make a new column with the average vote
df_compare["Average"] = df_compare[["Roberta", "Bert", "Bert_uncased", "Distilbert"]].mean(axis=1)
df_compare["Average_minus_BERT"] = df_compare[["Roberta", "Bert_uncased", "Distilbert"]].mean(axis=1)

Y_pred_majority = df_compare["Majority"].values
Y_pred_majority_minus = df_compare["Majority_minus_BERT"].values
Y_pred_average = df_compare["Average"].values
Y_pred_average_minus = df_compare["Average_minus_BERT"].values

result_score(Y_val, Y_pred_majority, "Majority", binary=False)
result_score(Y_val, Y_pred_majority_minus, "Majority_minus_BERT", binary=False)

print(f"MSE for Average:\
      \n- - - - - - - - - -\
      \n{mean_squared_error(Y_val, Y_pred_average)}\n")

print(f"MSE for Average minus BERT:\
      \n- - - - - - - - - -\
      \n{mean_squared_error(Y_val, Y_pred_average_minus)}\n")

Analysis with Majority:          
- - - - - - - - - -           
Accuracy: 0.4792626728110599          
Precision: 0.5413793103448276          
Recall: 0.6029208472686733          
F1: 0.48141718106995884          
MSE: 0.6728110599078341

Analysis with Majority_minus_BERT:          
- - - - - - - - - -           
Accuracy: 0.39631336405529954          
Precision: 0.2768552311435523          
Recall: 0.5612820512820513          
F1: 0.3657848324514991          
MSE: 0.8248847926267281

MSE for Average:      
- - - - - - - - - -      
0.38623271889400923

MSE for Average minus BERT:      
- - - - - - - - - -      
0.43317972350230416



  _warn_prf(average, modifier, msg_start, len(result))


In [49]:
df_compare.head()

Unnamed: 0,line_text,Annotated,Roberta,Bert,Bert_uncased,Distilbert,Majority,Majority_minus_BERT,Average,Average_minus_BERT
0,"Ok, but once this starts, it's going to be mov...",-1.0,1,-1,0,-1,-1.0,-1.0,-0.25,0.0
1,What?,0.0,-1,-1,0,-1,-1.0,-1.0,-0.75,-0.666667
2,"Just a little announcement folks, remember, th...",1.0,1,-1,-1,-1,-1.0,-1.0,-0.5,-0.333333
3,Is it true that you're making Dwight the manager?,-1.0,-1,0,0,1,0.0,-1.0,0.0,0.0
4,"Oh, damn. [Pam looks down at her salad] They'v...",0.0,-1,-1,-1,-1,-1.0,-1.0,-1.0,-1.0
