In [1]:
import pandas as pd

In [2]:
ekman_mapping ={
"anger": ["anger", "annoyance", "disapproval"],
"disgust": ["disgust"],
"fear": ["fear", "nervousness"],
"joy": ["joy", "amusement", "approval", "excitement", "gratitude",  "love", "optimism", "relief", "pride", "admiration", "desire", "caring","happy","happiness"],
"sadness": ["sadness", "disappointment", "embarrassment", "grief",  "remorse"],
"surprise": ["surprise", "realization", "confusion", "curiosity"],
"no emotion" : ["no emotion","neutral","empty"]
}

# happy/happiness 추가, neutral 추가, empty 추가

In [3]:
ekman_dic = {}
for k,v in ekman_mapping.items():
    for value in v:
        ekman_dic[value] = k

### daily dialog

In [4]:
with open("DailyDialog/dialogues_text.txt") as f:
    dd_text_raw = f.read()
with open("DailyDialog/dialogues_emotion.txt") as f:
    dd_emotion_raw = f.read()
    
dd_text = dd_text_raw.split("\n")
dd_emotion = dd_emotion_raw.split("\n")

print(len(dd_text))
print(len(dd_emotion))

13119
13119


In [5]:
dd_text_result = []
dd_emotion_result = []

for i in range(len(dd_text)):
    if len(dd_text[i].split("__eou__")) == len(dd_emotion[i].split(" ")):
        for j in dd_text[i].split("__eou__"):
            if j.strip() != "":
                dd_text_result.append(j.strip())
        for k in dd_emotion[i].split(" "):
            try:
                dd_emotion_result.append(int(k))
            except:
                pass
print(len(dd_text_result))
print(len(dd_emotion_result))

102968
102968


In [6]:
dd_df = pd.DataFrame({"text":dd_text_result,"emotion":dd_emotion_result})

In [7]:
dd_df["emotion"] = dd_df["emotion"].apply(lambda x: { 0: "no emotion", 1: "anger", 2: "disgust", 3: "fear", 4: "happiness", 5: "sadness", 6: "surprise"}[x])

In [8]:
dd_df.head()

Unnamed: 0,text,emotion
0,The kitchen stinks .,disgust
1,I'll throw out the garbage .,no emotion
2,"So Dick , how about getting some coffee for to...",happiness
3,Coffee ? I don ’ t honestly like that kind of ...,disgust
4,"Come on , you can at least try a little , besi...",no emotion


In [9]:
dd_df["emotion"].unique()

array(['disgust', 'no emotion', 'happiness', 'anger', 'surprise',
       'sadness', 'fear'], dtype=object)

In [10]:
dd_df["emotion"] = dd_df["emotion"].apply(lambda x: ekman_dic[x])

In [11]:
dd_df.head()

Unnamed: 0,text,emotion
0,The kitchen stinks .,disgust
1,I'll throw out the garbage .,no emotion
2,"So Dick , how about getting some coffee for to...",joy
3,Coffee ? I don ’ t honestly like that kind of ...,disgust
4,"Come on , you can at least try a little , besi...",no emotion


In [12]:
dd_df = pd.concat([dd_df["text"],pd.get_dummies(dd_df["emotion"])],axis=1)

In [13]:
dd_df.head()

Unnamed: 0,text,anger,disgust,fear,joy,no emotion,sadness,surprise
0,The kitchen stinks .,0,1,0,0,0,0,0
1,I'll throw out the garbage .,0,0,0,0,1,0,0
2,"So Dick , how about getting some coffee for to...",0,0,0,1,0,0,0
3,Coffee ? I don ’ t honestly like that kind of ...,0,1,0,0,0,0,0
4,"Come on , you can at least try a little , besi...",0,0,0,0,1,0,0


### emotions_dataset_for_NLP

In [14]:
with open("emotions_dataset_for_NLP/test.txt") as f:
    nlp_test_raw = f.read()
with open("emotions_dataset_for_NLP/train.txt") as f:
    nlp_train_raw = f.read()
with open("emotions_dataset_for_NLP/val.txt") as f:
    nlp_val_raw = f.read()

In [15]:
nlp_result = []
for i in [nlp_test_raw,nlp_train_raw,nlp_val_raw]:
    for j in i.split("\n"):
        if j != "":
            nlp_result.append(j.split(";"))

In [16]:
nlp_df = pd.DataFrame(data=nlp_result,columns=["text","emotion"])

In [17]:
nlp_df.head()

Unnamed: 0,text,emotion
0,im feeling rather rotten so im not very ambiti...,sadness
1,im updating my blog because i feel shitty,sadness
2,i never make her separate from me because i do...,sadness
3,i left with my bouquet of red and yellow tulip...,joy
4,i was feeling a little vain when i did this one,sadness


In [18]:
nlp_df[10:20]

Unnamed: 0,text,emotion
10,i don t feel particularly agitated,fear
11,i feel beautifully emotional knowing that thes...,sadness
12,i pay attention it deepens into a feeling of b...,fear
13,i just feel extremely comfortable with the gro...,joy
14,i find myself in the odd position of feeling s...,love
15,i was feeling as heartbroken as im sure katnis...,sadness
16,i feel a little mellow today,joy
17,i feel like my only role now would be to tear ...,sadness
18,i feel just bcoz a fight we get mad to each ot...,anger
19,i feel like reds and purples are just so rich ...,joy


In [19]:
nlp_df["emotion"].unique()

array(['sadness', 'joy', 'fear', 'anger', 'love', 'surprise'],
      dtype=object)

In [20]:
nlp_df["emotion"] = nlp_df["emotion"].apply(lambda x: ekman_dic[x])

In [21]:
nlp_df[10:20]

Unnamed: 0,text,emotion
10,i don t feel particularly agitated,fear
11,i feel beautifully emotional knowing that thes...,sadness
12,i pay attention it deepens into a feeling of b...,fear
13,i just feel extremely comfortable with the gro...,joy
14,i find myself in the odd position of feeling s...,joy
15,i was feeling as heartbroken as im sure katnis...,sadness
16,i feel a little mellow today,joy
17,i feel like my only role now would be to tear ...,sadness
18,i feel just bcoz a fight we get mad to each ot...,anger
19,i feel like reds and purples are just so rich ...,joy


In [22]:
nlp_df = pd.concat([nlp_df["text"],pd.get_dummies(nlp_df["emotion"])],axis=1)

In [23]:
nlp_df.head()

Unnamed: 0,text,anger,fear,joy,sadness,surprise
0,im feeling rather rotten so im not very ambiti...,0,0,0,1,0
1,im updating my blog because i feel shitty,0,0,0,1,0
2,i never make her separate from me because i do...,0,0,0,1,0
3,i left with my bouquet of red and yellow tulip...,0,0,1,0,0
4,i was feeling a little vain when i did this one,0,0,0,1,0


### emotions_in_text

In [24]:
eit_raw = pd.read_csv("emotions_in_text/Emotion_final.csv")

In [25]:
eit_raw.head()

Unnamed: 0,Text,Emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,love
4,i am feeling grouchy,anger


In [26]:
eit_raw.columns = ["text","emotion"]

In [27]:
eit_raw["emotion"].unique()

array(['sadness', 'anger', 'love', 'surprise', 'fear', 'happy'],
      dtype=object)

In [28]:
eit_raw["emotion"] = eit_raw["emotion"].apply(lambda x: ekman_dic[x])

In [29]:
eit_df = eit_raw.copy()

In [30]:
eit_df.head()

Unnamed: 0,text,emotion
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger
3,i am ever feeling nostalgic about the fireplac...,joy
4,i am feeling grouchy,anger


In [31]:
eit_df = pd.concat([eit_df["text"],pd.get_dummies(eit_df["emotion"])],axis=1)

In [32]:
eit_df.head()

Unnamed: 0,text,anger,fear,joy,sadness,surprise
0,i didnt feel humiliated,0,0,0,1,0
1,i can go from feeling so hopeless to so damned...,0,0,0,1,0
2,im grabbing a minute to post i feel greedy wrong,1,0,0,0,0
3,i am ever feeling nostalgic about the fireplac...,0,0,1,0,0
4,i am feeling grouchy,1,0,0,0,0


### go_emotions

In [33]:
ge_test_raw = pd.read_csv("go_emotions/test_clean_7.csv")
ge_train_raw = pd.read_csv("go_emotions/train_clean_7.csv")
ge_val_raw = pd.read_csv("go_emotions/val_clean_7.csv")

ge_train_raw = ge_train_raw[ge_test_raw.columns]
ge_val_raw = ge_val_raw[ge_test_raw.columns]

In [34]:
ge_df = pd.concat([ge_test_raw, ge_train_raw, ge_val_raw],ignore_index=True)

In [35]:
ge_df.rename(columns={"Clean_text":"text","neutral":"no emotion"},inplace=True)

In [36]:
ge_df.head()

Unnamed: 0,text,joy,anger,surprise,sadness,disgust,fear,no emotion
0,i am really sorry about your situation frownin...,0,0,0,1,0,0,0
1,it is wonderful because it is awful at not with,1,0,0,0,0,0,0
2,kings fan here good luck to you guys ! will be...,1,0,0,0,0,0,0
3,i did not know that thank you for teaching me ...,1,0,0,0,0,0,0
4,they got bored from haunting earth for thousan...,0,0,0,0,0,0,1


### tweet_emotions

In [37]:
twt_raw = pd.read_csv("tweet_emotion/tweet_emotions.csv")

In [38]:
twt_raw.head()

Unnamed: 0,tweet_id,sentiment,content
0,1956967341,empty,@tiffanylue i know i was listenin to bad habi...
1,1956967666,sadness,Layin n bed with a headache ughhhh...waitin o...
2,1956967696,sadness,Funeral ceremony...gloomy friday...
3,1956967789,enthusiasm,wants to hang out with friends SOON!
4,1956968416,neutral,@dannycastillo We want to trade with someone w...


In [39]:
twt_df = twt_raw[["content","sentiment"]]

In [40]:
twt_df.head()

Unnamed: 0,content,sentiment
0,@tiffanylue i know i was listenin to bad habi...,empty
1,Layin n bed with a headache ughhhh...waitin o...,sadness
2,Funeral ceremony...gloomy friday...,sadness
3,wants to hang out with friends SOON!,enthusiasm
4,@dannycastillo We want to trade with someone w...,neutral


In [41]:
def tweet_cleaning(x):
    temp_split = x.split(" ")
    if temp_split[0].startswith("@"):
        temp_split = temp_split[1:]
    return " ".join(temp_split)

In [42]:
twt_df.columns=["text","emotion"]

In [43]:
twt_df["text"] = twt_df["text"].apply(lambda x: tweet_cleaning(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  twt_df["text"] = twt_df["text"].apply(lambda x: tweet_cleaning(x))


In [44]:
twt_df.head()

Unnamed: 0,text,emotion
0,i know i was listenin to bad habit earlier an...,empty
1,Layin n bed with a headache ughhhh...waitin o...,sadness
2,Funeral ceremony...gloomy friday...,sadness
3,wants to hang out with friends SOON!,enthusiasm
4,We want to trade with someone who has Houston ...,neutral


In [45]:
twt_df["emotion"].unique()

array(['empty', 'sadness', 'enthusiasm', 'neutral', 'worry', 'surprise',
       'love', 'fun', 'hate', 'happiness', 'boredom', 'relief', 'anger'],
      dtype=object)

In [46]:
delete = []
for i in range(len(twt_df)):
    if (twt_df["emotion"][i] in ekman_dic) == 0:
        delete.append(i)

In [47]:
twt_df.drop(delete,inplace=True)
twt_df.reset_index(inplace=True,drop=True)
twt_df.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


Unnamed: 0,text,emotion
0,i know i was listenin to bad habit earlier an...,empty
1,Layin n bed with a headache ughhhh...waitin o...,sadness
2,Funeral ceremony...gloomy friday...,sadness
3,We want to trade with someone who has Houston ...,neutral
4,"I should be sleep, but im not! thinking about ...",sadness


In [48]:
twt_df["emotion"] = twt_df["emotion"].apply(lambda x: ekman_dic[x])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  twt_df["emotion"] = twt_df["emotion"].apply(lambda x: ekman_dic[x])


In [49]:
twt_df = pd.concat([twt_df["text"],pd.get_dummies(twt_df["emotion"])],axis=1)

In [50]:
twt_df.head()

Unnamed: 0,text,anger,joy,no emotion,sadness,surprise
0,i know i was listenin to bad habit earlier an...,0,0,1,0,0
1,Layin n bed with a headache ughhhh...waitin o...,0,0,0,1,0
2,Funeral ceremony...gloomy friday...,0,0,0,1,0
3,We want to trade with someone who has Houston ...,0,0,1,0,0
4,"I should be sleep, but im not! thinking about ...",0,0,0,1,0


In [51]:
columns = []
for i in [dd_df,nlp_df,eit_df,ge_df,twt_df]:
    columns = columns + list(i.columns)
columns_set = set(columns)-{"text"}

In [52]:
columns_set

{'anger', 'disgust', 'fear', 'joy', 'no emotion', 'sadness', 'surprise'}

In [53]:
for i in [dd_df,nlp_df,eit_df,ge_df,twt_df]:
    non_columns = columns_set - set(i.columns)
    for c in non_columns:
        i[c] = 0

In [54]:
dd_df = dd_df[["text"]+list(ekman_mapping.keys())]
nlp_df = nlp_df[["text"]+list(ekman_mapping.keys())]
eit_df = eit_df[["text"]+list(ekman_mapping.keys())]
ge_df = ge_df[["text"]+list(ekman_mapping.keys())]
twt_df = twt_df[["text"]+list(ekman_mapping.keys())]

In [55]:
ekman_mapping.keys()

dict_keys(['anger', 'disgust', 'fear', 'joy', 'sadness', 'surprise', 'no emotion'])

In [56]:
for i in [dd_df,nlp_df,eit_df,ge_df,twt_df]:
    display(i.head())

Unnamed: 0,text,anger,disgust,fear,joy,sadness,surprise,no emotion
0,The kitchen stinks .,0,1,0,0,0,0,0
1,I'll throw out the garbage .,0,0,0,0,0,0,1
2,"So Dick , how about getting some coffee for to...",0,0,0,1,0,0,0
3,Coffee ? I don ’ t honestly like that kind of ...,0,1,0,0,0,0,0
4,"Come on , you can at least try a little , besi...",0,0,0,0,0,0,1


Unnamed: 0,text,anger,disgust,fear,joy,sadness,surprise,no emotion
0,im feeling rather rotten so im not very ambiti...,0,0,0,0,1,0,0
1,im updating my blog because i feel shitty,0,0,0,0,1,0,0
2,i never make her separate from me because i do...,0,0,0,0,1,0,0
3,i left with my bouquet of red and yellow tulip...,0,0,0,1,0,0,0
4,i was feeling a little vain when i did this one,0,0,0,0,1,0,0


Unnamed: 0,text,anger,disgust,fear,joy,sadness,surprise,no emotion
0,i didnt feel humiliated,0,0,0,0,1,0,0
1,i can go from feeling so hopeless to so damned...,0,0,0,0,1,0,0
2,im grabbing a minute to post i feel greedy wrong,1,0,0,0,0,0,0
3,i am ever feeling nostalgic about the fireplac...,0,0,0,1,0,0,0
4,i am feeling grouchy,1,0,0,0,0,0,0


Unnamed: 0,text,anger,disgust,fear,joy,sadness,surprise,no emotion
0,i am really sorry about your situation frownin...,0,0,0,0,1,0,0
1,it is wonderful because it is awful at not with,0,0,0,1,0,0,0
2,kings fan here good luck to you guys ! will be...,0,0,0,1,0,0,0
3,i did not know that thank you for teaching me ...,0,0,0,1,0,0,0
4,they got bored from haunting earth for thousan...,0,0,0,0,0,0,1


Unnamed: 0,text,anger,disgust,fear,joy,sadness,surprise,no emotion
0,i know i was listenin to bad habit earlier an...,0,0,0,0,0,0,1
1,Layin n bed with a headache ughhhh...waitin o...,0,0,0,0,1,0,0
2,Funeral ceremony...gloomy friday...,0,0,0,0,1,0,0
3,We want to trade with someone who has Houston ...,0,0,0,0,0,0,1
4,"I should be sleep, but im not! thinking about ...",0,0,0,0,1,0,0


In [57]:
result = pd.concat([dd_df,nlp_df,eit_df,ge_df,twt_df],ignore_index=True)

In [58]:
result

Unnamed: 0,text,anger,disgust,fear,joy,sadness,surprise,no emotion
0,The kitchen stinks .,0,1,0,0,0,0,0
1,I'll throw out the garbage .,0,0,0,0,0,0,1
2,"So Dick , how about getting some coffee for to...",0,0,0,1,0,0,0
3,Coffee ? I don ’ t honestly like that kind of ...,0,1,0,0,0,0,0
4,"Come on , you can at least try a little , besi...",0,0,0,0,0,0,1
...,...,...,...,...,...,...,...,...
226189,,0,0,0,0,0,0,1
226190,Happy Mothers Day All my love,0,0,0,1,0,0,0
226191,Happy Mother's Day to all the mommies out ther...,0,0,0,1,0,0,0
226192,WASSUP BEAUTIFUL!!! FOLLOW ME!! PEEP OUT MY N...,0,0,0,1,0,0,0


In [59]:
result.to_csv("result.csv",index=False)