# C-More

In [1]:
import json
import pandas as pd

#### Load json data into a dataframe

In [2]:
list_json = []

with open('tweet_json_1day.txt') as file:
    for line in file:
        data = json.loads(line)
        list_json.append(data)

df = pd.DataFrame(list_json, columns = ['id', 'text', 'lang', 'created_at', 'public_metrics'])

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7153 entries, 0 to 7152
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   id              7153 non-null   object
 1   text            7153 non-null   object
 2   lang            7153 non-null   object
 3   created_at      7153 non-null   object
 4   public_metrics  7153 non-null   object
dtypes: object(5)
memory usage: 279.5+ KB


#### Select only tweets in English

In [4]:
df_en = df[df['lang'] == 'en'].copy()

In [5]:
df_en.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5377 entries, 0 to 7151
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   id              5377 non-null   object
 1   text            5377 non-null   object
 2   lang            5377 non-null   object
 3   created_at      5377 non-null   object
 4   public_metrics  5377 non-null   object
dtypes: object(5)
memory usage: 252.0+ KB


In [6]:
columns_to_remove = ['lang', 'created_at', 'public_metrics']

df_en.drop(columns_to_remove, axis=1, inplace=True)

In [7]:
df_en.reset_index(inplace=True, drop=True)

In [8]:
df_en.head()

Unnamed: 0,id,text
0,1539397722595377152,@trashevrythng @hardevrythng @McDonalds and th...
1,1539397657571074049,@trashevrythng @hardevrythng @McDonalds respec...
2,1539397645625458688,@PeePosh2 @Scottschlittenh @sceley2011 @Joe_Ma...
3,1539397571013115904,@auauwra too bad im sending 8 mcdonalds large ...
4,1539397434715082754,@_idkjia Last time I went to McDonalds. lol ...


In [9]:
df_en.tail()

Unnamed: 0,id,text
5372,1539035600451514375,if belos comes back we should just give him a ...
5373,1539035573263863808,we were at mcdonalds and suddenly 4 school bus...
5374,1539035538300207104,IM SORRY I TOLD U TO DRINK MCDONALDS SPRITE ht...
5375,1539035499242864641,Tough times @McDonalds inflation is shrinking ...
5376,1539035475352211456,@McDonalds It's literally raw so I am not sure...


#### Analyse emotions of tweets with the pipeline function

In [10]:
from transformers import pipeline

In [11]:
df_emo_hartmann = df_en.copy()

In [12]:
df_emo_hartmann.head()

Unnamed: 0,id,text
0,1539397722595377152,@trashevrythng @hardevrythng @McDonalds and th...
1,1539397657571074049,@trashevrythng @hardevrythng @McDonalds respec...
2,1539397645625458688,@PeePosh2 @Scottschlittenh @sceley2011 @Joe_Ma...
3,1539397571013115904,@auauwra too bad im sending 8 mcdonalds large ...
4,1539397434715082754,@_idkjia Last time I went to McDonalds. lol ...


In [13]:
df_emo_hartmann.tail()

Unnamed: 0,id,text
5372,1539035600451514375,if belos comes back we should just give him a ...
5373,1539035573263863808,we were at mcdonalds and suddenly 4 school bus...
5374,1539035538300207104,IM SORRY I TOLD U TO DRINK MCDONALDS SPRITE ht...
5375,1539035499242864641,Tough times @McDonalds inflation is shrinking ...
5376,1539035475352211456,@McDonalds It's literally raw so I am not sure...


In [14]:
# https://huggingface.co/j-hartmann/emotion-english-distilroberta-base

classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=1)
classifier("I love this!")

[{'label': 'joy', 'score': 0.9771687984466553}]

In [15]:
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=3)
classifier("I love this!")

[{'label': 'joy', 'score': 0.9771687984466553},
 {'label': 'surprise', 'score': 0.008528675884008408},
 {'label': 'neutral', 'score': 0.005764578003436327}]

In [16]:
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=7)
classifier("I love this!")

[{'label': 'joy', 'score': 0.9771687984466553},
 {'label': 'surprise', 'score': 0.008528675884008408},
 {'label': 'neutral', 'score': 0.005764578003436327},
 {'label': 'anger', 'score': 0.004419777542352676},
 {'label': 'sadness', 'score': 0.002092391485348344},
 {'label': 'disgust', 'score': 0.0016119879437610507},
 {'label': 'fear', 'score': 0.00041385198710486293}]

In [17]:
emotions = classifier(df_emo_hartmann['text'].tolist(), top_k=3) # top_k=3 to get the scores for the top-3 emotions

In [18]:
emotions

[[{'label': 'joy', 'score': 0.365884006023407},
  {'label': 'neutral', 'score': 0.2650728225708008},
  {'label': 'fear', 'score': 0.1775878220796585}],
 [{'label': 'joy', 'score': 0.4457860291004181},
  {'label': 'anger', 'score': 0.2831656336784363},
  {'label': 'neutral', 'score': 0.16409169137477875}],
 [{'label': 'fear', 'score': 0.5146477818489075},
  {'label': 'anger', 'score': 0.2957209348678589},
  {'label': 'sadness', 'score': 0.09062924981117249}],
 [{'label': 'sadness', 'score': 0.957469642162323},
  {'label': 'surprise', 'score': 0.018132803961634636},
  {'label': 'neutral', 'score': 0.014415279030799866}],
 [{'label': 'surprise', 'score': 0.6650456786155701},
  {'label': 'joy', 'score': 0.13482117652893066},
  {'label': 'neutral', 'score': 0.1345197558403015}],
 [{'label': 'sadness', 'score': 0.574595034122467},
  {'label': 'neutral', 'score': 0.13777540624141693},
  {'label': 'surprise', 'score': 0.11814521998167038}],
 [{'label': 'sadness', 'score': 0.6687462329864502},


In [19]:
df_emo_hartmann['emotion'] = pd.Series(emotions)

In [20]:
df_emo_hartmann.head()

Unnamed: 0,id,text,emotion
0,1539397722595377152,@trashevrythng @hardevrythng @McDonalds and th...,"[{'label': 'joy', 'score': 0.365884006023407},..."
1,1539397657571074049,@trashevrythng @hardevrythng @McDonalds respec...,"[{'label': 'joy', 'score': 0.4457860291004181}..."
2,1539397645625458688,@PeePosh2 @Scottschlittenh @sceley2011 @Joe_Ma...,"[{'label': 'fear', 'score': 0.5146477818489075..."
3,1539397571013115904,@auauwra too bad im sending 8 mcdonalds large ...,"[{'label': 'sadness', 'score': 0.9574696421623..."
4,1539397434715082754,@_idkjia Last time I went to McDonalds. lol ...,"[{'label': 'surprise', 'score': 0.665045678615..."


In [21]:
df_emo_hartmann.tail()

Unnamed: 0,id,text,emotion
5372,1539035600451514375,if belos comes back we should just give him a ...,"[{'label': 'neutral', 'score': 0.9272969365119..."
5373,1539035573263863808,we were at mcdonalds and suddenly 4 school bus...,"[{'label': 'surprise', 'score': 0.914400100708..."
5374,1539035538300207104,IM SORRY I TOLD U TO DRINK MCDONALDS SPRITE ht...,"[{'label': 'neutral', 'score': 0.5742588043212..."
5375,1539035499242864641,Tough times @McDonalds inflation is shrinking ...,"[{'label': 'fear', 'score': 0.7552894949913025..."
5376,1539035475352211456,@McDonalds It's literally raw so I am not sure...,"[{'label': 'sadness', 'score': 0.5684058666229..."


In [22]:
df_emo_hartmann['emotion']

0       [{'label': 'joy', 'score': 0.365884006023407},...
1       [{'label': 'joy', 'score': 0.4457860291004181}...
2       [{'label': 'fear', 'score': 0.5146477818489075...
3       [{'label': 'sadness', 'score': 0.9574696421623...
4       [{'label': 'surprise', 'score': 0.665045678615...
                              ...                        
5372    [{'label': 'neutral', 'score': 0.9272969365119...
5373    [{'label': 'surprise', 'score': 0.914400100708...
5374    [{'label': 'neutral', 'score': 0.5742588043212...
5375    [{'label': 'fear', 'score': 0.7552894949913025...
5376    [{'label': 'sadness', 'score': 0.5684058666229...
Name: emotion, Length: 5377, dtype: object

In [24]:
 df_emo_hartmann['emotion'].map(lambda x: len(x)).value_counts()

3    5377
Name: emotion, dtype: int64

In [30]:
df_emo_hartmann['emotion'][0]

[{'label': 'joy', 'score': 0.365884006023407},
 {'label': 'neutral', 'score': 0.2650728225708008},
 {'label': 'fear', 'score': 0.1775878220796585}]

In [27]:
df_emo_hartmann['emotion'][0][0]['label']

'joy'

In [28]:
df_emo_hartmann['emotion'][0][1]['label']

'neutral'

In [29]:
df_emo_hartmann['emotion'][0][2]['label']

'fear'

In [31]:
df_emo_hartmann['top1_emotion'] = df_emo_hartmann['emotion'].map(lambda x: x[0]['label'])
df_emo_hartmann['top2_emotion'] = df_emo_hartmann['emotion'].map(lambda x: x[1]['label'])
df_emo_hartmann['top3_emotion'] = df_emo_hartmann['emotion'].map(lambda x: x[2]['label'])

In [32]:
df_emo_hartmann

Unnamed: 0,id,text,emotion,top1_emotion,top2_emotion,top3_emotion
0,1539397722595377152,@trashevrythng @hardevrythng @McDonalds and th...,"[{'label': 'joy', 'score': 0.365884006023407},...",joy,neutral,fear
1,1539397657571074049,@trashevrythng @hardevrythng @McDonalds respec...,"[{'label': 'joy', 'score': 0.4457860291004181}...",joy,anger,neutral
2,1539397645625458688,@PeePosh2 @Scottschlittenh @sceley2011 @Joe_Ma...,"[{'label': 'fear', 'score': 0.5146477818489075...",fear,anger,sadness
3,1539397571013115904,@auauwra too bad im sending 8 mcdonalds large ...,"[{'label': 'sadness', 'score': 0.9574696421623...",sadness,surprise,neutral
4,1539397434715082754,@_idkjia Last time I went to McDonalds. lol ...,"[{'label': 'surprise', 'score': 0.665045678615...",surprise,joy,neutral
...,...,...,...,...,...,...
5372,1539035600451514375,if belos comes back we should just give him a ...,"[{'label': 'neutral', 'score': 0.9272969365119...",neutral,surprise,sadness
5373,1539035573263863808,we were at mcdonalds and suddenly 4 school bus...,"[{'label': 'surprise', 'score': 0.914400100708...",surprise,fear,anger
5374,1539035538300207104,IM SORRY I TOLD U TO DRINK MCDONALDS SPRITE ht...,"[{'label': 'neutral', 'score': 0.5742588043212...",neutral,sadness,surprise
5375,1539035499242864641,Tough times @McDonalds inflation is shrinking ...,"[{'label': 'fear', 'score': 0.7552894949913025...",fear,sadness,anger


In [34]:
df_emo_hartmann['top1_emotion'].value_counts()

neutral     1331
fear         954
surprise     861
joy          782
anger        734
sadness      627
disgust       88
Name: top1_emotion, dtype: int64

In [35]:
df_emo_hartmann['top2_emotion'].value_counts()

surprise    1485
neutral     1016
anger        917
joy          629
sadness      611
fear         596
disgust      123
Name: top2_emotion, dtype: int64

In [36]:
df_emo_hartmann['top3_emotion'].value_counts()

sadness     1132
joy         1121
surprise    1084
neutral      880
anger        630
fear         402
disgust      128
Name: top3_emotion, dtype: int64