In [101]:
import pandas as pd
import json
import matplotlib.pyplot as plt
import os

In [102]:
output_folder = './output/aya-101/'
dictionary_folder= './dataset/'
dictionary_file = 'NRC-Emotion-Lexicon-Wordlevel-v0.92.txt'

In [103]:
def load_dictionary(dictionary_folder, dictionary_file):
    # dictionary load
    true_headers=['term','AffectCategory','AssociationFlag']
    dictionary = pd.read_csv(dictionary_folder + dictionary_file, sep='\t', header=None, names=true_headers)
    dictionary['AssociationFlag'] = dictionary['AssociationFlag'].astype('int64')
    dictionary = dictionary[dictionary['AssociationFlag'] == 1]
    dictionary = dictionary[dictionary['AffectCategory'] != 'positive']
    dictionary = dictionary[dictionary['AffectCategory'] != 'negative']
    return dictionary

In [104]:
def report_load(output_folder,output_file):
    # data load# report load
    print(output_folder + output_file)
    tmp=[]
    target_group_name= output_file.split("_")[0]
    with open(output_folder + output_file, 'r') as f:
        data = json.load(f)
        values = list(data.values())
        for i,value in enumerate(values):
            try:                
                if "{" not in value:
                    tmp.append({"emotion":"Cannot adopt emotion","explanation":value.strip(),"target_group":target_group_name})
                elif "}" not in value:
                    if value[-1]=="\"":
                        try_again = json.loads(value.strip() + "}")
                    else:
                        try_again = json.loads(value.strip() + "\"}")
                    tmp.append(try_again)
                else:
                    tmp.append(json.loads(value.strip()))
            except Exception as e:
                print(e)
                if value != '':
                    print(value.strip())
                    print(i,"\t",value.strip())
                    
        values = tmp
        emotion = pd.DataFrame(values)
        emotion['emotion'] = emotion['emotion'].str.lower()
    return emotion

In [105]:
# Define a function to handle the replacement logic
def replace_none(row):
    if pd.isna(row['AffectCategory']):
        if row['emotion'] == 'Cannot adopt emotion':
            return 'Cannot adopt emotion'
        else:
            return 'Cannot map the emotion'
    else:
        return row['AffectCategory']


def merged_data(emotion,dictionary,output_folder,output_file):
    # join
    merged = pd.merge(emotion, dictionary, left_on='emotion', right_on='term', how='left')
    merged['AffectCategory'] = merged.apply(replace_none, axis=1)
    os.makedirs(output_folder+"/refactored/", exist_ok=True)
    merged.to_csv(output_folder+"/refactored/" + output_file.replace('.json', '_refactored.csv'), index=False, header=True, sep='\t')
    target_group = merged.drop(columns=['emotion','explanation','term','AssociationFlag']).groupby('AffectCategory').count()
    print("The output is the following: \n",target_group)
    return merged

In [111]:
dictionary = load_dictionary(dictionary_folder, dictionary_file)
for output_file in os.listdir(output_folder):
    if 'refactored' in output_file or 'images' in output_file:
        continue
    emotion = report_load(output_folder,output_file)
    merged = merged_data(emotion,dictionary,output_folder,output_file)

./output/aya-101/english_Asian_man_1_results.json
The output is the following: 
                         target_group  event
AffectCategory                             
Cannot map the emotion            22     22
anger                            396    396
anticipation                     114    114
disgust                          218    218
fear                             217    217
joy                              158    158
sadness                          570    570
surprise                          27     27
trust                            349    349
./output/aya-101/english_Asian_man_2_results.json
The output is the following: 
                         target_group  event
AffectCategory                             
Cannot map the emotion            23     23
anger                            388    388
anticipation                     107    107
disgust                          212    212
fear                             210    210
joy                              157    157
sa