In [1]:
import pandas as pd
import re

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)


In [3]:
def remove_quotation(text):
    text = text.strip()
    if text.startswith('"'):
        text = text[1:]

    if text.endswith(','):
        text = text[:-1]
    if text.endswith('"'):
        text = text[:-1]
    return text


def elf_to_df(path):
    friends_dict = {}
    with open(path, 'r', encoding="utf8") as file:
        for line in file.readlines():
            line = re.sub(r'[\[\]]', "", line).strip()
            if ":" in line:
                key, value = line.split(':', 1)
                key = key.replace("\"", "").replace(',', "")
                value = value.replace(r"\u0092", "'")
                value = value.replace(r"\u0085", "...")

                value = remove_quotation(value)
                if key not in friends_dict.keys():
                    friends_dict[key] = []
                friends_dict[key].append(value)
    return pd.DataFrame(friends_dict)

def get_column_name(row):
    for column in row.index:
        if column != 'text' and row[column] == 1:
            return column
    return None


# Emotion lines

In [48]:
# for key, value in friends_dict.items():
#   print(key, value)
#   print('\n')
df = elf_to_df("./raw/EmotionLines/Friends/friends_dev.json")
df1 = elf_to_df("./raw/EmotionLines/Friends/friends_test.json")
df2 = elf_to_df("./raw/EmotionLines/Friends/friends_train.json")


In [52]:
em_df = pd.concat([df,df1,df2])

In [53]:
em_df.columns

Index(['speaker', 'utterance', 'emotion', 'annotation'], dtype='object')

In [56]:
em_df.drop(columns=['speaker','annotation'],inplace=True)
em_df.head()

Unnamed: 0,utterance,emotion
0,"Oh my God, he's lost it. He's totally lost it.",non-neutral
1,What?,surprise
2,"Or! Or, we could go to the bank, close our acc...",neutral
3,You're a genius!,joy
4,"Aww, man, now we won't be bank buddies!",sadness


In [57]:
em_dict = em_df.to_dict(orient='records')
print(em_dict[:3])

[{'utterance': "Oh my God, he's lost it. He's totally lost it.", 'emotion': 'non-neutral'}, {'utterance': 'What?', 'emotion': 'surprise'}, {'utterance': 'Or! Or, we could go to the bank, close our accounts and cut them off at the source.', 'emotion': 'neutral'}]


# Empathetic Dialogs

In [None]:
from datasets import load_dataset

In [None]:
ds = load_dataset('empathetic_dialogues')

In [None]:
val_ds = ds['validation']
ant_ds = val_ds.filter(lambda x:x['context']=='faithful')

In [None]:
ant_ds["utterance"][0:10]

In [None]:
emp_tag_map = {'anticipating':'happy', 'impressed':'happy',
                'guilty':'sad', 'surprised':'surprised', 
                'furious':'angry', 'excited':'happy', 'sad':'sad',
                'afraid':'sad', 'confident':'happy', 'grateful':'happy',
                'disgusted':'sad', 'jealous':'sad', 'faithful':'happy', 
                'trusting':'happy', 'prepared':'happy', 'joyful':'happy',
                'embarrassed':'sad', 'lonely':'sad', 'ashamed':'sad',
                'devastated':'sad', 'caring':'neutral', 'sentimental':'neutral', 
                'nostalgic':'neutral', 'hopeful':'happy', 'apprehensive':'sad',
                'angry':'angry', 'annoyed':'angry', 'anxious':'sad', 
                'content':'happy', 'terrified':'sad', 'proud':'happy',
                'disappointed':'neutral'}


In [None]:
# val_dict = dict.fromkeys(set(ds['train']["context"]),"")
val_tup = []
for i in ds['validation']:
    val_tup.append({'context': emp_tag_map[i["context"]],
                    'text': i['utterance'].replace('_comma_',', ')})

# GoEmotions

In [40]:
goemotion_tag_map = {'admiration':'happy',
 'amusement':'surprised',
 'anger': 'angry', 
 'annoyance':'angry',
 'approval':'happy',
 'caring':'happy',
 'confusion':'sad',
 'curiosity':'happy',
 'desire':'neutral',
 'disappointment':'sad',
 'disapproval':'angry',
 'disgust':'angry',
 'embarrassment':'sad',
 'excitement':'happy',
 'fear':'sad',
 'gratitude':'happy',
 'grief':'sad',
 'joy':'sad',
 'love':'happy',
 'nervousness':'surprised',
 'neutral':'neutral',
 'optimism':'neutral',
 'pride':'neutral',
 'realization':'neutral',
 'relief':'neutral',
 'remorse':'sad',
 'sadness':'sad',
 'surprise':'surprise'}

In [33]:
go1 = pd.read_csv('./raw/GoEmotions/data/full_dataset/goemotions_1.csv')
go2 = pd.read_csv('./raw/GoEmotions/data/full_dataset/goemotions_2.csv')
go3 = pd.read_csv('./raw/GoEmotions/data/full_dataset/goemotions_3.csv')

go_df = pd.concat([go1,go2,go3])

go_df = go_df.drop(columns=['id', 'author', 'subreddit', 'link_id', 'parent_id',
       'created_utc', 'rater_id', 'example_very_unclear'])


In [34]:
go_df['EmotionTag'] = go_df.apply(lambda row: get_column_name(row), axis=1)

In [35]:
go_df = go_df.drop(columns=['admiration', 'amusement', 'anger', 'annoyance', 'approval',
       'caring', 'confusion', 'curiosity', 'desire', 'disappointment',
       'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear',
       'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride',
       'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral'])

In [36]:
go_df.dropna(inplace=True)

In [43]:
go_df['EmotionTag'] = go_df['EmotionTag'].map(goemotion_tag_map)

In [45]:
go_dict = go_df.to_dict(orient='records')

In [47]:
go_dict[:2]

[{'text': 'That game hurt.', 'EmotionTag': 'sad'},
 {'text': "You do right, if you don't care then fuck 'em!",
  'EmotionTag': 'neutral'}]