In [None]:
# connect to google colab
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [1]:
# base path
DATA_PATH = './drive/MyDrive/fyp-code/codes/data/emotion_intensity/'

## Import Libraries

In [None]:
# Libraries
import pandas as pd
import numpy as np
from tqdm import tqdm

## Load the dataframes

In [None]:
# get rule-based score (VADER and t2e)
rule_based_short_df = pd.read_csv(DATA_PATH+'emotion_intensity_rule_base_scoring_short_data.csv')
rule_based_long_df = pd.read_csv(DATA_PATH+'emotion_intensity_rule_base_scoring_long_data.csv')

In [None]:
rule_based_short_df.head()

Unnamed: 0,text_cleaned,vader_neg,angry_score,surprise_score,fear_score,sadness_score
0,I get to spend New Year is home again alone an...,0.306,0.0,0.0,0.0,1.0
1,"Depressed and lonely Stuck in a deep, never en...",0.527,0.0,0.0,0.4,0.6
2,Learning to pretend to have a good time had be...,0.054,0.0,0.0,0.33,0.0
3,So far he stop texting me after I said somethi...,0.103,0.0,0.5,0.5,0.0
4,sigh ?? I have not cried so much I am in so mu...,0.216,0.0,0.0,0.0,1.0


In [None]:
rule_based_long_df.head()

Unnamed: 0,text_cleaned,vader_neg,angry_score,surprise_score,fear_score,sadness_score
0,Just another night. Another night of feeling l...,0.176,0.0,0.12,0.44,0.38
1,Is it possible to fake depression? I have been...,0.201,0.0,0.11,0.27,0.5
2,Imagine being attractive Imagine what it would...,0.144,0.0,0.23,0.15,0.46
3,"Best moment to have anxiety It is am, I am tir...",0.128,0.05,0.32,0.34,0.15
4,"hi, I am a year-old male from the uk, over the...",0.146,0.0,0.0,0.36,0.57


In [None]:
# get feature-based score (regression on WASSA 2017 data)
feature_based_short_df = pd.read_csv(DATA_PATH+'emotion_intensity_feature_based_prediction_scoring_short_data.csv')
feature_based_long_df = pd.read_csv(DATA_PATH+'emotion_intensity_feature_based_prediction_scoring_long_data.csv')

In [None]:
feature_based_short_df.head()

Unnamed: 0,Text,text_cleaned,fear_score,angry_score,sadness_score
0,I get to spend New Year's home again alone and...,spend new year home lonely,0.367257,0.340932,0.604463
1,"Depressed and lonely /: Stuck in a deep, never...",depressed lonely stuck deep ending hole,0.453489,0.493586,0.590169
2,Learning to pretend to have a good time had be...,learning pretend good time natural skill hope ...,0.256979,0.306531,0.408736
3,So far he stop texting me…after I said somethi...,far stop texting meafter said somethingso hope...,0.458516,0.475224,0.408144
4,*sigh* ???? I haven't cried so much…I'm in so ...,sigh not cried muchi pain,0.527133,0.438837,0.559635


In [None]:
feature_based_long_df.head()

Unnamed: 0,Text,text_cleaned,fear_score,angry_score,sadness_score
0,Just another night. Another night of feeling l...,just night night feeling lonely just wondering...,0.501568,0.422321,0.498666
1,Is it possible to fake depression? I have been...,possible fake depression feeling bad month now...,0.634426,0.493567,0.724639
2,Imagine being attractive Imagine what it would...,imagine attractive imagine like desired look n...,0.407223,0.595135,0.452349
3,"Best moment to have anxiety It's 3:30am, I'm t...",best moment anxiety am tired want early guess ...,0.501608,0.404472,0.483677
4,"hi, I'm a 21 year-old male from the uk, over t...",hi yearold male uk past month increasingly dep...,0.383745,0.449346,0.483427


## Combine the scores of two datasets for references

In [None]:
# create dataframes to store all individual scores
short_score_df = pd.DataFrame()
long_score_df = pd.DataFrame()

# short data
short_score_df['text_raw'] = feature_based_short_df['Text']
short_score_df['text_cleaned_vader_t2e'] = rule_based_short_df['text_cleaned']
short_score_df['text_cleaned_feature_based'] = feature_based_short_df['text_cleaned']
short_score_df['vader_neg'] = rule_based_short_df['vader_neg']
short_score_df['anger_score_t2e'] = rule_based_short_df['angry_score']
short_score_df['surprise_score_t2e'] = rule_based_short_df['surprise_score']
short_score_df['fear_score_t2e'] = rule_based_short_df['fear_score']
short_score_df['sadness_score_t2e'] = rule_based_short_df['sadness_score']
short_score_df['anger_score_pred'] = feature_based_short_df['angry_score']
short_score_df['fear_score_pred'] = feature_based_short_df['fear_score']
short_score_df['sadness_score_pred'] = feature_based_short_df['sadness_score']

# long data
long_score_df['text_raw'] = feature_based_long_df['Text']
long_score_df['text_cleaned_vader_t2e'] = rule_based_long_df['text_cleaned']
long_score_df['text_cleaned_feature_based'] = feature_based_long_df['text_cleaned']
long_score_df['vader_neg'] = rule_based_long_df['vader_neg']
long_score_df['anger_score_t2e'] = rule_based_long_df['angry_score']
long_score_df['surprise_score_t2e'] = rule_based_long_df['surprise_score']
long_score_df['fear_score_t2e'] = rule_based_long_df['fear_score']
long_score_df['sadness_score_t2e'] = rule_based_long_df['sadness_score']
long_score_df['anger_score_pred'] = feature_based_long_df['angry_score']
long_score_df['fear_score_pred'] = feature_based_long_df['fear_score']
long_score_df['sadness_score_pred'] = feature_based_long_df['sadness_score']

In [None]:
short_score_df.head()

Unnamed: 0,text_raw,text_cleaned_vader_t2e,text_cleaned_feature_based,vader_neg,anger_score_t2e,surprise_score_t2e,fear_score_t2e,sadness_score_t2e,anger_score_pred,fear_score_pred,sadness_score_pred
0,I get to spend New Year's home again alone and...,I get to spend New Year is home again alone an...,spend new year home lonely,0.306,0.0,0.0,0.0,1.0,0.340932,0.367257,0.604463
1,"Depressed and lonely /: Stuck in a deep, never...","Depressed and lonely Stuck in a deep, never en...",depressed lonely stuck deep ending hole,0.527,0.0,0.0,0.4,0.6,0.493586,0.453489,0.590169
2,Learning to pretend to have a good time had be...,Learning to pretend to have a good time had be...,learning pretend good time natural skill hope ...,0.054,0.0,0.0,0.33,0.0,0.306531,0.256979,0.408736
3,So far he stop texting me…after I said somethi...,So far he stop texting me after I said somethi...,far stop texting meafter said somethingso hope...,0.103,0.0,0.5,0.5,0.0,0.475224,0.458516,0.408144
4,*sigh* ???? I haven't cried so much…I'm in so ...,sigh ?? I have not cried so much I am in so mu...,sigh not cried muchi pain,0.216,0.0,0.0,0.0,1.0,0.438837,0.527133,0.559635


In [None]:
long_score_df.head()

Unnamed: 0,text_raw,text_cleaned_vader_t2e,text_cleaned_feature_based,vader_neg,anger_score_t2e,surprise_score_t2e,fear_score_t2e,sadness_score_t2e,anger_score_pred,fear_score_pred,sadness_score_pred
0,Just another night. Another night of feeling l...,Just another night. Another night of feeling l...,just night night feeling lonely just wondering...,0.176,0.0,0.12,0.44,0.38,0.422321,0.501568,0.498666
1,Is it possible to fake depression? I have been...,Is it possible to fake depression? I have been...,possible fake depression feeling bad month now...,0.201,0.0,0.11,0.27,0.5,0.493567,0.634426,0.724639
2,Imagine being attractive Imagine what it would...,Imagine being attractive Imagine what it would...,imagine attractive imagine like desired look n...,0.144,0.0,0.23,0.15,0.46,0.595135,0.407223,0.452349
3,"Best moment to have anxiety It's 3:30am, I'm t...","Best moment to have anxiety It is am, I am tir...",best moment anxiety am tired want early guess ...,0.128,0.05,0.32,0.34,0.15,0.404472,0.501608,0.483677
4,"hi, I'm a 21 year-old male from the uk, over t...","hi, I am a year-old male from the uk, over the...",hi yearold male uk past month increasingly dep...,0.146,0.0,0.0,0.36,0.57,0.449346,0.383745,0.483427


## Compute the depression metric score from all the scores above using a combination of symbolic and subsymbolic AI method
Weightage inspired by the hourglass model and the plutchik's wheel of emotions

depression_score = 0.2\*(vader_score) + 0.05\*(angry_score_t2e + surprise_score_t2e + fear_score_t2e) + 0.25\*(sadness_score_t2e) + 0.05\*(angry_score_pred +  fear_score_pred) + 0.3\*(sadness_score_pred)

In [None]:
# get the depression metric score based on the above equation

# short dataset
depression_score_short_list = []
for i in tqdm(range(len(short_score_df))):
    dep_score = 0.2*short_score_df.vader_neg[i] + \
                0.05*(short_score_df.anger_score_t2e[i]+short_score_df.surprise_score_t2e[i]+short_score_df.fear_score_t2e[i]) + \
                0.25*(short_score_df.sadness_score_t2e[i]) + \
                0.05*(short_score_df.anger_score_pred[i]+short_score_df.fear_score_pred[i]) + \
                0.30*(short_score_df.sadness_score_pred[i])
    depression_score_short_list.append(dep_score)

# long dataset
depression_score_long_list = []
for i in tqdm(range(len(long_score_df))):
    dep_score = 0.2*long_score_df.vader_neg[i] + \
                0.05*(long_score_df.anger_score_t2e[i]+long_score_df.surprise_score_t2e[i]+long_score_df.fear_score_t2e[i]) + \
                0.25*(long_score_df.sadness_score_t2e[i]) + \
                0.05*(long_score_df.anger_score_pred[i]+long_score_df.fear_score_pred[i]) + \
                0.30*(long_score_df.sadness_score_pred[i])
    depression_score_long_list.append(dep_score)

100%|██████████| 834/834 [00:00<00:00, 10224.39it/s]
100%|██████████| 1436/1436 [00:00<00:00, 11675.91it/s]


In [None]:
# append to the dataframe with all the scores
short_score_df['depression_score'] = depression_score_short_list
long_score_df['depression_score'] = depression_score_long_list

In [None]:
# look at the short dataframe
short_score_df.head()

Unnamed: 0,text_raw,text_cleaned_vader_t2e,text_cleaned_feature_based,vader_neg,anger_score_t2e,surprise_score_t2e,fear_score_t2e,sadness_score_t2e,anger_score_pred,fear_score_pred,sadness_score_pred,depression_score
0,I get to spend New Year's home again alone and...,I get to spend New Year is home again alone an...,spend new year home lonely,0.306,0.0,0.0,0.0,1.0,0.340932,0.367257,0.604463,0.527948
1,"Depressed and lonely /: Stuck in a deep, never...","Depressed and lonely Stuck in a deep, never en...",depressed lonely stuck deep ending hole,0.527,0.0,0.0,0.4,0.6,0.493586,0.453489,0.590169,0.499804
2,Learning to pretend to have a good time had be...,Learning to pretend to have a good time had be...,learning pretend good time natural skill hope ...,0.054,0.0,0.0,0.33,0.0,0.306531,0.256979,0.408736,0.178096
3,So far he stop texting me…after I said somethi...,So far he stop texting me after I said somethi...,far stop texting meafter said somethingso hope...,0.103,0.0,0.5,0.5,0.0,0.475224,0.458516,0.408144,0.23973
4,*sigh* ???? I haven't cried so much…I'm in so ...,sigh ?? I have not cried so much I am in so mu...,sigh not cried muchi pain,0.216,0.0,0.0,0.0,1.0,0.438837,0.527133,0.559635,0.509389


In [None]:
# determine the maximum and minimum depression score
print(max(short_score_df['depression_score']))
print(min(short_score_df['depression_score']))

0.7157879105448722
0.08697953820228577


In [None]:
# look at the long dataframe
long_score_df.head()

Unnamed: 0,text_raw,text_cleaned_vader_t2e,text_cleaned_feature_based,vader_neg,anger_score_t2e,surprise_score_t2e,fear_score_t2e,sadness_score_t2e,anger_score_pred,fear_score_pred,sadness_score_pred,depression_score
0,Just another night. Another night of feeling l...,Just another night. Another night of feeling l...,just night night feeling lonely just wondering...,0.176,0.0,0.12,0.44,0.38,0.422321,0.501568,0.498666,0.353994
1,Is it possible to fake depression? I have been...,Is it possible to fake depression? I have been...,possible fake depression feeling bad month now...,0.201,0.0,0.11,0.27,0.5,0.493567,0.634426,0.724639,0.457991
2,Imagine being attractive Imagine what it would...,Imagine being attractive Imagine what it would...,imagine attractive imagine like desired look n...,0.144,0.0,0.23,0.15,0.46,0.595135,0.407223,0.452349,0.348623
3,"Best moment to have anxiety It's 3:30am, I'm t...","Best moment to have anxiety It is am, I am tir...",best moment anxiety am tired want early guess ...,0.128,0.05,0.32,0.34,0.15,0.404472,0.501608,0.483677,0.289007
4,"hi, I'm a 21 year-old male from the uk, over t...","hi, I am a year-old male from the uk, over the...",hi yearold male uk past month increasingly dep...,0.146,0.0,0.0,0.36,0.57,0.449346,0.383745,0.483427,0.376383


In [None]:
# determine the maximum and minimum depression score
print(max(long_score_df['depression_score']))
print(min(long_score_df['depression_score']))

0.5298691115140914
0.19468505854606627


In [None]:
# create a new dataframe to get only the columns of text and final depression score
# short dataset
short_score_final_df = pd.DataFrame()
short_score_final_df['text_raw'] = short_score_df['text_raw']
short_score_final_df['text_cleaned_vader_t2e'] = short_score_df['text_cleaned_vader_t2e']
short_score_final_df['text_cleaned_feature_based'] = short_score_df['text_cleaned_feature_based']
short_score_final_df['depression_score'] = short_score_df['depression_score']

# long dataset
long_score_final_df = pd.DataFrame()
long_score_final_df['text_raw'] = long_score_df['text_raw']
long_score_final_df['text_cleaned_vader_t2e'] = long_score_df['text_cleaned_vader_t2e']
long_score_final_df['text_cleaned_feature_based'] = long_score_df['text_cleaned_feature_based']
long_score_final_df['depression_score'] = long_score_df['depression_score']

## Export the dataset with the depression score

In [None]:
short_score_df.to_csv(DATA_PATH+'emotion_intensity_depression_score_short_text_full.csv', index=False)
long_score_df.to_csv(DATA_PATH+'emotion_intensity_depression_score_long_text_full.csv', index=False)
short_score_final_df.to_csv(DATA_PATH+'emotion_intensity_depression_score_short_text_summarized.csv', index=False)
long_score_final_df.to_csv(DATA_PATH+'emotion_intensity_depression_score_long_text_summarized.csv', index=False)