In [1]:
## Loading the emotions dataset
# This loads the emotions dataset from a specified path and prints the first few entries.
import pandas as pd

### loading the separate datasets
df1 = pd.read_csv("data/goemotions_1.csv")
df2 = pd.read_csv("data/goemotions_2.csv")
df3 = pd.read_csv("data/goemotions_3.csv")

### concatenating the datasets
df = pd.concat([df1, df2, df3], ignore_index=True)

In [2]:
### Reducing emotion mapping into 7 proposed for effective computing

emotion_mapping = {
    "joy": ["joy", "excitement", "amusement", "pride"],
    "sadness": ["sadness", "disappointment", "grief"],
    "anger": ["anger", "annoyance", "frustration"],
    "anxiety": ["fear", "nervousness", "worry"],
    "love": ["love", "caring", "gratitude"],
    "surprise": ["surprise"],
    "neutral": ["neutral", "confusion", "curiosity", "realization"]
}


In [3]:
## drop all columns except 'text' and 'labels'
df.drop(
    columns=[
        "id",
        "author",
        "subreddit",
        "link_id",
        "parent_id",
        "created_utc",
        "rater_id",
        "example_very_unclear",
    ]
, inplace=True)

df.head()

Unnamed: 0,text,admiration,amusement,anger,annoyance,approval,caring,confusion,curiosity,desire,...,love,nervousness,optimism,pride,realization,relief,remorse,sadness,surprise,neutral
0,That game hurt.,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
1,>sexuality shouldn’t be a grouping category I...,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,"You do right, if you don't care then fuck 'em!",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1
3,Man I love reddit.,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,"[NAME] was nowhere near them, he was by the Fa...",0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [4]:
## Transforming the data according to the new emotion mapping
## Converting columns to long format
df_long = df.melt(
    id_vars = "text",
    var_name = "emotions"
)
df_long

Unnamed: 0,text,emotions,value
0,That game hurt.,admiration,0
1,>sexuality shouldn’t be a grouping category I...,admiration,0
2,"You do right, if you don't care then fuck 'em!",admiration,0
3,Man I love reddit.,admiration,0
4,"[NAME] was nowhere near them, he was by the Fa...",admiration,0
...,...,...,...
5914295,Everyone likes [NAME].,neutral,0
5914296,Well when you’ve imported about a gazillion of...,neutral,0
5914297,That looks amazing,neutral,0
5914298,The FDA has plenty to criticize. But like here...,neutral,0


In [5]:
### Filtering to keep only rows with 1s (indicating presence of emotion)
df_long = df_long[df_long["value"] == 1]
df_long.head()

Unnamed: 0,text,emotions,value
15,"I appreciate it, that's good to know. I hope I...",admiration,1
18,Pretty much every Punjabi dude I've met.,admiration,1
28,"Lots, play store or apple store vpn. Nord is good",admiration,1
35,nice!! I'll try this one,admiration,1
50,She’s like a kewpie doll with them. Precious.,admiration,1


In [6]:
## drop the 'value' column as it's no longer needed
df_long.drop(columns=["value"], inplace=True)
df_long.reset_index(inplace=True, drop=True)
df_long

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_long.drop(columns=["value"], inplace=True)


Unnamed: 0,text,emotions
0,"I appreciate it, that's good to know. I hope I...",admiration
1,Pretty much every Punjabi dude I've met.,admiration
2,"Lots, play store or apple store vpn. Nord is good",admiration
3,nice!! I'll try this one,admiration
4,She’s like a kewpie doll with them. Precious.,admiration
...,...,...
249524,OH YEAH!!,neutral
249525,Let me give you a hint: THEY PLAY IN BOSTON!!!,neutral
249526,to google cuz I wasn't alive back then but dam...,neutral
249527,"Wow, she headlines two shows now",neutral


In [7]:
### Applying the new emotion mapping
df_long["emotion"] = df_long.apply(
    lambda row: (
        "joy"
        if row["emotions"] in emotion_mapping["joy"]
        else "sadness"
        if row["emotions"] in emotion_mapping["sadness"]
        else "anger"
        if row["emotions"] in emotion_mapping["anger"]
        else "anxiety"
        if row["emotions"] in emotion_mapping["anxiety"]
        else "love"
        if row["emotions"] in emotion_mapping["love"]
        else "surprise"
        if row["emotions"] in emotion_mapping["surprise"]
        else "neutral"
        if row["emotions"] in emotion_mapping["neutral"]
        else None
    ),
    axis=1,
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_long["emotion"] = df_long.apply(


In [None]:

## Cleaning up the dataframe
df_long.drop(columns=["emotions"], inplace=True)

## Filtering out any rows where emotion is None (in case of unmapped emotions)
df_long = df_long[df_long["emotion"].notnull()]


## Grouping by text to aggregate emotions
df_final  = df_long.groupby("text")["emotion"].apply(lambda x: x.tolist()).reset_index()
df_final.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_long.drop(columns=["emotions"], inplace=True)


Unnamed: 0,text,emotion
0,"""If you don't wear BROWN AND ORANGE...YOU DON...","[anger, anger, anger, neutral, neutral]"
1,"""What do Scottish people look like?"" How I wo...","[neutral, neutral, neutral, love, love]"
2,"### A surprise, to be sure, but a welcome one","[surprise, surprise, surprise]"
3,"'*Pray*, v. To ask that the laws of the unive...","[neutral, neutral, neutral]"
4,">it'll get invaded by tankie, unfortunately. ...","[neutral, neutral]"


In [None]:
## Function to get the final emotion based on frequency
def get_final_emotion(emotion):
    max_count = 0
    final_em = None
    for em in emotion:
        if emotion.count(em) > max_count:
            max_count = emotion.count(em)
            final_em = em
    return final_em


df_final["final_emotion"] = df_final["emotion"].apply(get_final_emotion)
df_final.head()


## dropping the intermediate 'emotion' column
df_final.drop(columns=["emotion"], inplace=True)
df_final.head()

Unnamed: 0,text,final_emotion
0,"""If you don't wear BROWN AND ORANGE...YOU DON...",anger
1,"""What do Scottish people look like?"" How I wo...",neutral
2,"### A surprise, to be sure, but a welcome one",surprise
3,"'*Pray*, v. To ask that the laws of the unive...",neutral
4,">it'll get invaded by tankie, unfortunately. ...",neutral
