In [None]:
# connect to google colab
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
# base path
DATA_PATH = './drive/MyDrive/fyp-code/codes/data/emotion_intensity/'

In [None]:
# usual import
import pandas as pd
import numpy as np
from tqdm import tqdm
# for the api calls
import requests
import os
import re

In [None]:
# to call the file that contains the api keys
base_folder = '/content/drive/MyDrive/fyp-code/codes'
training_path = os.path.join(base_folder, "sentic_api_key.zip") 
!unzip $training_path

#from sentic_api_key import CONCEPT_PARSING_KEY, POLARITY_CLASSIFICATION_KEY, INTENSITY_RANKING_KEY, EMOTION_CATEGORIZATION_KEY, DEPRESSION_IDENTIFICATION_KEY
from sentic_api_key import DEPRESSION_IDENTIFICATION_KEY

Archive:  /content/drive/MyDrive/fyp-code/codes/sentic_api_key.zip
  inflating: sentic_api_key.py       


## Import Short Text

In [None]:
# import the short dataset
short_data = pd.read_csv(DATA_PATH+'emotion_intensity_depressed_clean_short_data_vader_t2e.csv')[['text_cleaned_t2e_vader', 'Label']]
short_data.head(3)

Unnamed: 0,text_cleaned_t2e_vader,Label
0,I get to spend New Year is home again alone an...,1
1,"Depressed and lonely Stuck in a deep, never en...",1
2,Learning to pretend to have a good time had be...,1


In [None]:
# sample sentences
print(short_data.text_cleaned_t2e_vader[0])
print(short_data.text_cleaned_t2e_vader[15])

I get to spend New Year is home again alone and lonely. ??
Absolutly shocking service from especially when owed Expected better from a comp Ive dealt with yr unhappy badservice


## Import Long Text

In [None]:
# import the long dataset
long_data = pd.read_csv(DATA_PATH+'emotion_intensity_depressed_clean_long_data_vader_t2e.csv')[['text_cleaned_t2e_vader', 'Label']]
long_data.head(3)

Unnamed: 0,text_cleaned_t2e_vader,Label
0,Just another night. Another night of feeling l...,1
1,Is it possible to fake depression? I have been...,1
2,Imagine being attractive Imagine what it would...,1


In [None]:
# sample sentences
print(long_data.text_cleaned_t2e_vader[0])
print(long_data.text_cleaned_t2e_vader[15])

Just another night. Another night of feeling lonely and just wondering what I did wrong in life to deserve this unhappiness. I have never felt a pain stronger than being rejected by the love of your life. The person who give you a purpose. The person who is supposed to make everything better. You would give your life for this person and they just do not love you anywhere near the same.
My cat just got hit by a car I cannot even think about it too much. My family is a little sad but are kinda telling me to get over it. I have been in such a terrible depressive spell for the last few week and this just ruined everything. When older pet die by natural cause I do not cry a much a because I know we gave them the best life and they lived very long but my suki wa only year old she wa so fat and adorable and I cannot believe she is gone just because of a bad driver. She always came to me when I wa upset she would lay on top of me and snuggle. I wish I had more time with her. now I have almost 

## Do a test on the API call first

In [None]:
# test sentence
response = requests.get(f"https://sentic.net/api/en/{DEPRESSION_IDENTIFICATION_KEY}.py?text=a little sad and happy somehow")
print(response.status_code)
# the values received as the response, usually is in json, but this is in string
print(response.text)
# check the type of the response
print(type(response.text))

200
33%

<class 'str'>


In [None]:
# text response return as string because of the percentage, need to convert it to a float and normalized to 0 and 1
score = int(re.sub("[^0-9]", "", response.text))*0.01
score

0.33

## Helper function to get the depression scores from the sentic API

In [None]:
def get_depression_score_from_api(text):
    response = requests.get(f"https://sentic.net/api/en/{DEPRESSION_IDENTIFICATION_KEY}.py?text={text}")
    # check status code if success or not
    if response.status_code == 200:
        score = float(re.sub("[^0-9.]", "", response.text))*0.01
    else:
        print("status code not 200!")
        score = -1 # negative one to denote that an error occurred when processing this piece of text
    
    return score

## Apply the Depression Identification API into the text data we have

In [None]:
# try on one piece of data
score = get_depression_score_from_api(short_data.text_cleaned_t2e_vader[10])
score

0.466

### Short data

In [None]:
# do it for the whole dataframe
# create a list to store the depression score for the short text
short_data_depression_score_list = []

# get the depression score for the whole short text dataset
for sentence in tqdm(short_data.text_cleaned_t2e_vader.tolist()):
    score = get_depression_score_from_api(sentence)
    short_data_depression_score_list.append(score)

  5%|▍         | 38/834 [00:43<14:31,  1.09s/it]

status code not 200!


  5%|▌         | 42/834 [00:47<14:21,  1.09s/it]

status code not 200!


  6%|▌         | 51/834 [00:57<14:25,  1.11s/it]

status code not 200!


 11%|█▏        | 95/834 [01:46<13:34,  1.10s/it]

status code not 200!


 15%|█▍        | 125/834 [02:19<12:48,  1.08s/it]

status code not 200!


 16%|█▌        | 133/834 [02:28<13:00,  1.11s/it]

status code not 200!


 18%|█▊        | 151/834 [02:47<12:20,  1.08s/it]

status code not 200!


 19%|█▉        | 160/834 [02:57<12:10,  1.08s/it]

status code not 200!


 22%|██▏       | 185/834 [03:25<11:43,  1.08s/it]

status code not 200!


 26%|██▌       | 213/834 [03:55<11:15,  1.09s/it]

status code not 200!


 29%|██▊       | 239/834 [04:24<10:48,  1.09s/it]

status code not 200!


 30%|███       | 253/834 [04:39<10:38,  1.10s/it]

status code not 200!


 32%|███▏      | 264/834 [04:51<10:20,  1.09s/it]

status code not 200!


 32%|███▏      | 266/834 [04:53<10:19,  1.09s/it]

status code not 200!


 33%|███▎      | 278/834 [05:07<10:24,  1.12s/it]

status code not 200!


 34%|███▍      | 283/834 [05:12<10:09,  1.11s/it]

status code not 200!


 39%|███▉      | 325/834 [06:00<09:26,  1.11s/it]

status code not 200!


 43%|████▎     | 358/834 [06:36<08:32,  1.08s/it]

status code not 200!


 46%|████▌     | 385/834 [07:05<08:08,  1.09s/it]

status code not 200!


 47%|████▋     | 388/834 [07:09<08:11,  1.10s/it]

status code not 200!


 51%|█████     | 423/834 [07:47<07:32,  1.10s/it]

status code not 200!


 63%|██████▎   | 528/834 [09:43<05:39,  1.11s/it]

status code not 200!


 64%|██████▍   | 536/834 [09:51<05:24,  1.09s/it]

status code not 200!


 65%|██████▌   | 544/834 [10:00<05:19,  1.10s/it]

status code not 200!


 66%|██████▌   | 551/834 [10:08<05:08,  1.09s/it]

status code not 200!


 68%|██████▊   | 563/834 [10:21<04:59,  1.10s/it]

status code not 200!


 69%|██████▉   | 576/834 [10:36<04:42,  1.09s/it]

status code not 200!


 78%|███████▊  | 654/834 [12:02<03:17,  1.10s/it]

status code not 200!


 82%|████████▏ | 683/834 [12:34<02:44,  1.09s/it]

status code not 200!


 85%|████████▌ | 713/834 [13:07<02:12,  1.09s/it]

status code not 200!


 88%|████████▊ | 737/834 [13:33<01:47,  1.11s/it]

status code not 200!


 91%|█████████▏| 762/834 [14:01<01:19,  1.10s/it]

status code not 200!


 92%|█████████▏| 771/834 [14:11<01:08,  1.09s/it]

status code not 200!


 98%|█████████▊| 820/834 [15:05<00:15,  1.12s/it]

status code not 200!


100%|██████████| 834/834 [15:21<00:00,  1.10s/it]


In [None]:
# create a dataframe to store the depression scoring
depression_score_api_short_df = pd.DataFrame()
depression_score_api_short_df['text_cleaned'] = short_data.text_cleaned_t2e_vader
depression_score_api_short_df['depression_score'] = short_data_depression_score_list
depression_score_api_short_df.head(50)

Unnamed: 0,text_cleaned,depression_score
0,I get to spend New Year is home again alone an...,0.44333
1,"Depressed and lonely Stuck in a deep, never en...",0.8
2,Learning to pretend to have a good time had be...,0.11
3,So far he stop texting me after I said somethi...,0.25
4,sigh ?? I have not cried so much I am in so mu...,0.44333
5,Thank god the last presentation is over! tomor...,0.0
6,"No, I am not wouldepressed because of the weat...",0.66667
7,Very sad day in Edmonton! Praying for all who ...,1.0
8,Losing my uncle after the wedding it just..I d...,0.33333
9,Nobody care in real life and nobody care here....,0.42571


In [None]:
# look at the rows with a depression score with -1 and do the analysis again because of the status code error just now
for index, sentence in tqdm(enumerate(depression_score_api_short_df.text_cleaned.tolist())):
    if depression_score_api_short_df.depression_score[index] == -1.0000:
        score = get_depression_score_from_api(sentence)
        depression_score_api_short_df.depression_score[index] = score

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
38it [00:01, 25.01it/s]

status code not 200!


42it [00:02, 13.49it/s]

status code not 200!


51it [00:04, 10.43it/s]

status code not 200!


95it [00:05, 21.05it/s]

status code not 200!


125it [00:06, 23.28it/s]

status code not 200!


133it [00:07, 17.92it/s]

status code not 200!


151it [00:08, 17.48it/s]

status code not 200!


160it [00:09, 14.73it/s]

status code not 200!


185it [00:10, 17.49it/s]

status code not 200!


213it [00:11, 20.01it/s]

status code not 200!


239it [00:12, 21.24it/s]

status code not 200!


253it [00:13, 18.38it/s]

status code not 200!


264it [00:14, 15.99it/s]

status code not 200!


266it [00:16, 11.68it/s]

status code not 200!


278it [00:17, 11.46it/s]

status code not 200!


283it [00:18,  9.50it/s]

status code not 200!


325it [00:19, 18.22it/s]

status code not 200!


358it [00:20, 21.84it/s]

status code not 200!


385it [00:21, 22.59it/s]

status code not 200!


388it [00:22, 16.65it/s]

status code not 200!


423it [00:23, 21.04it/s]

status code not 200!


528it [00:24, 43.50it/s]

status code not 200!


536it [00:25, 32.59it/s]

status code not 200!


544it [00:26, 25.18it/s]

status code not 200!


551it [00:28, 19.69it/s]

status code not 200!


563it [00:29, 17.30it/s]

status code not 200!


576it [00:30, 15.52it/s]

status code not 200!


654it [00:31, 32.32it/s]

status code not 200!


683it [00:32, 30.68it/s]

status code not 200!


713it [00:33, 29.71it/s]

status code not 200!


737it [00:34, 27.71it/s]

status code not 200!


762it [00:35, 26.26it/s]

status code not 200!


771it [00:36, 20.93it/s]

status code not 200!


834it [00:37, 22.09it/s]

status code not 200!





### Long data

In [None]:
# create a list to store the depression score for the long text
long_data_depression_score_list = []

# get the depression score for the whole short text dataset
for sentence in tqdm(long_data.text_cleaned_t2e_vader.tolist()):
    score = get_depression_score_from_api(sentence)
    long_data_depression_score_list.append(score)

  1%|▏         | 19/1436 [00:25<27:17,  1.16s/it]

status code not 200!


  7%|▋         | 95/1436 [02:12<35:46,  1.60s/it]

status code not 200!


 10%|█         | 149/1436 [03:28<30:33,  1.42s/it]

status code not 200!


 11%|█         | 158/1436 [03:40<25:41,  1.21s/it]

status code not 200!


 12%|█▏        | 170/1436 [03:58<27:44,  1.31s/it]

status code not 200!


 13%|█▎        | 188/1436 [04:24<27:56,  1.34s/it]

status code not 200!


 15%|█▍        | 210/1436 [04:57<30:51,  1.51s/it]

status code not 200!


 21%|██        | 304/1436 [07:10<28:59,  1.54s/it]

status code not 200!


 22%|██▏       | 321/1436 [07:34<26:03,  1.40s/it]

status code not 200!


 24%|██▍       | 345/1436 [08:07<22:29,  1.24s/it]

status code not 200!


 25%|██▌       | 360/1436 [08:28<23:52,  1.33s/it]

status code not 200!


 27%|██▋       | 388/1436 [09:09<19:32,  1.12s/it]

status code not 200!


 27%|██▋       | 393/1436 [09:16<24:18,  1.40s/it]

status code not 200!


 27%|██▋       | 394/1436 [09:18<26:50,  1.55s/it]

status code not 200!


 28%|██▊       | 397/1436 [09:21<22:14,  1.28s/it]

status code not 200!


 44%|████▍     | 631/1436 [14:58<24:30,  1.83s/it]

status code not 200!


 47%|████▋     | 681/1436 [16:12<21:02,  1.67s/it]

status code not 200!


 49%|████▉     | 705/1436 [16:45<17:15,  1.42s/it]

status code not 200!


 51%|█████     | 730/1436 [17:21<15:35,  1.32s/it]

status code not 200!


 54%|█████▍    | 772/1436 [18:21<17:29,  1.58s/it]

status code not 200!


 54%|█████▍    | 773/1436 [18:22<17:14,  1.56s/it]

status code not 200!


 56%|█████▌    | 798/1436 [19:00<15:31,  1.46s/it]

status code not 200!


 60%|██████    | 866/1436 [20:32<12:53,  1.36s/it]

status code not 200!


 63%|██████▎   | 901/1436 [21:21<12:14,  1.37s/it]

status code not 200!


 65%|██████▍   | 930/1436 [22:01<09:57,  1.18s/it]

status code not 200!


 65%|██████▌   | 935/1436 [22:10<11:44,  1.41s/it]

status code not 200!


 66%|██████▌   | 943/1436 [22:21<10:12,  1.24s/it]

status code not 200!


 66%|██████▋   | 954/1436 [22:36<10:57,  1.36s/it]

status code not 200!


 68%|██████▊   | 980/1436 [23:13<12:48,  1.69s/it]

status code not 200!


 72%|███████▏  | 1035/1436 [24:30<07:37,  1.14s/it]

status code not 200!


 77%|███████▋  | 1102/1436 [26:04<08:00,  1.44s/it]

status code not 200!


 79%|███████▉  | 1134/1436 [26:51<06:49,  1.36s/it]

status code not 200!


 80%|███████▉  | 1145/1436 [27:06<06:48,  1.40s/it]

status code not 200!


 81%|████████▏ | 1168/1436 [27:38<05:37,  1.26s/it]

status code not 200!


 82%|████████▏ | 1177/1436 [27:52<05:53,  1.36s/it]

status code not 200!


 85%|████████▍ | 1218/1436 [28:48<04:33,  1.25s/it]

status code not 200!


 90%|████████▉ | 1290/1436 [30:28<03:15,  1.34s/it]

status code not 200!


 90%|█████████ | 1294/1436 [30:33<02:50,  1.20s/it]

status code not 200!


 95%|█████████▌| 1365/1436 [32:15<01:24,  1.19s/it]

status code not 200!


 98%|█████████▊| 1409/1436 [33:17<00:32,  1.22s/it]

status code not 200!


 99%|█████████▉| 1426/1436 [33:40<00:13,  1.38s/it]

status code not 200!


100%|██████████| 1436/1436 [33:54<00:00,  1.42s/it]


In [None]:
# create a dataframe to store the depression scoring
depression_score_api_long_df = pd.DataFrame()
depression_score_api_long_df['text_cleaned'] = long_data.text_cleaned_t2e_vader
depression_score_api_long_df['depression_score'] = long_data_depression_score_list
depression_score_api_long_df.head(50)

Unnamed: 0,text_cleaned,depression_score
0,Just another night. Another night of feeling l...,0.376
1,Is it possible to fake depression? I have been...,0.4726
2,Imagine being attractive Imagine what it would...,0.66
3,"Best moment to have anxiety It is am, I am tir...",0.16298
4,"hi, I am a year-old male from the uk, over the...",0.51067
5,I do not want to die feeling like I wa never l...,0.3848
6,I wa just curious what your feeling are on lon...,0.47901
7,"My hearing, attentiveness and social awareness...",0.1525
8,Hello out there.. Ugh. I battle with depressio...,0.38806
9,Currently waiting at the hospital to be seen I...,0.31188


In [None]:
# look at the rows with a depression score with -1 and do the analysis again because of the status code error just now
for index, sentence in tqdm(enumerate(depression_score_api_long_df.text_cleaned.tolist())):
    if depression_score_api_long_df.depression_score[index] == -1.0000:
        score = get_depression_score_from_api(sentence)
        depression_score_api_long_df.depression_score[index] = score

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """
19it [00:00, 19.63it/s]

status code not 200!


95it [00:03, 32.87it/s]

status code not 200!


149it [00:04, 36.76it/s]

status code not 200!


158it [00:05, 28.15it/s]

status code not 200!


170it [00:06, 24.85it/s]

status code not 200!


188it [00:07, 22.13it/s]

status code not 200!


210it [00:08, 19.67it/s]

status code not 200!


304it [00:10, 34.63it/s]

status code not 200!


321it [00:11, 28.01it/s]

status code not 200!


360it [00:13, 22.48it/s]

status code not 200!


388it [00:14, 25.43it/s]

status code not 200!


393it [00:16, 16.85it/s]

status code not 200!


395it [00:18, 10.38it/s]

status code not 200!


397it [00:19,  8.20it/s]

status code not 200!


631it [00:21, 47.58it/s]

status code not 200!


681it [00:23, 39.06it/s]

status code not 200!


705it [00:25, 32.82it/s]

status code not 200!


730it [00:26, 29.59it/s]

status code not 200!


772it [00:28, 25.69it/s]

status code not 200!


775it [00:30, 19.21it/s]

status code not 200!


798it [00:31, 18.43it/s]

status code not 200!


866it [00:32, 27.03it/s]

status code not 200!


901it [00:33, 30.08it/s]

status code not 200!


930it [00:34, 31.37it/s]

status code not 200!


935it [00:35, 25.58it/s]

status code not 200!


943it [00:36, 21.94it/s]

status code not 200!


954it [00:37, 15.69it/s]

status code not 200!


980it [00:39, 15.49it/s]

status code not 200!


1035it [00:40, 26.61it/s]

status code not 200!


1102it [00:41, 33.79it/s]

status code not 200!


1134it [00:42, 30.36it/s]

status code not 200!


1145it [00:44, 23.08it/s]

status code not 200!


1168it [00:45, 22.74it/s]

status code not 200!


1177it [00:46, 18.49it/s]

status code not 200!


1218it [00:47, 23.49it/s]

status code not 200!


1290it [00:48, 34.33it/s]

status code not 200!


1294it [00:49, 27.90it/s]

status code not 200!


1365it [00:50, 42.91it/s]

status code not 200!


1409it [00:51, 45.88it/s]

status code not 200!


1436it [00:52, 27.37it/s]

status code not 200!





## Export the dataset

In [None]:
depression_score_api_short_df.to_csv(DATA_PATH+'emotion_intensity_sentic_depression_identification_short_data.csv', index=False)
depression_score_api_long_df.to_csv(DATA_PATH+'emotion_intensity_sentic_depression_identification_long_data.csv', index=False)

## Filling up the negative data with a zero

In [None]:
score_short = pd.read_csv(DATA_PATH+'emotion_intensity_sentic_depression_identification_short_data.csv')
score_long = pd.read_csv(DATA_PATH+'emotion_intensity_sentic_depression_identification_long_data.csv')

In [None]:
# fill the -1 with zeros for the short data
for index, sentence in tqdm(enumerate(score_short.text_cleaned.tolist())):
    if score_short.depression_score[index] == -1.0000:
        score_short.depression_score[index] = 0.0000

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
834it [00:00, 46711.66it/s]


In [None]:
score_short.head(50)

Unnamed: 0,text_cleaned,depression_score
0,I get to spend New Year is home again alone an...,0.44333
1,"Depressed and lonely Stuck in a deep, never en...",0.8
2,Learning to pretend to have a good time had be...,0.11
3,So far he stop texting me after I said somethi...,0.25
4,sigh ?? I have not cried so much I am in so mu...,0.44333
5,Thank god the last presentation is over! tomor...,0.0
6,"No, I am not wouldepressed because of the weat...",0.66667
7,Very sad day in Edmonton! Praying for all who ...,1.0
8,Losing my uncle after the wedding it just..I d...,0.33333
9,Nobody care in real life and nobody care here....,0.42571


In [None]:
# fill the -1 with zeros for the long data
for index, sentence in tqdm(enumerate(score_long.text_cleaned.tolist())):
    if score_long.depression_score[index] == -1.0000:
        score_long.depression_score[index] = 0.0000

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.
1436it [00:00, 68200.07it/s]


In [None]:
score_long.head(50)

Unnamed: 0,text_cleaned,depression_score
0,Just another night. Another night of feeling l...,0.376
1,Is it possible to fake depression? I have been...,0.4726
2,Imagine being attractive Imagine what it would...,0.66
3,"Best moment to have anxiety It is am, I am tir...",0.16298
4,"hi, I am a year-old male from the uk, over the...",0.51067
5,I do not want to die feeling like I wa never l...,0.3848
6,I wa just curious what your feeling are on lon...,0.47901
7,"My hearing, attentiveness and social awareness...",0.1525
8,Hello out there.. Ugh. I battle with depressio...,0.38806
9,Currently waiting at the hospital to be seen I...,0.31188


In [None]:
# export the final datasets
score_short.to_csv(DATA_PATH+'emotion_intensity_sentic_depression_identification_short_data.csv', index=False)
score_long.to_csv(DATA_PATH+'emotion_intensity_sentic_depression_identification_long_data.csv', index=False)