In [237]:
import pandas as pd
import json
import matplotlib.pyplot as plt
import os

In [238]:
output_folder = './output/Meta-Llama-3-8B-Instruct/'
dictionary_folder= './dataset/'
dictionary_file = 'NRC-Emotion-Lexicon-Wordlevel-v0.92.txt'

In [239]:
def load_dictionary(dictionary_folder, dictionary_file):
    # dictionary load
    true_headers=['term','AffectCategory','AssociationFlag']
    dictionary = pd.read_csv(dictionary_folder + dictionary_file, sep='\t', header=None, names=true_headers)
    dictionary['AssociationFlag'] = dictionary['AssociationFlag'].astype('int64')
    dictionary = dictionary[dictionary['AssociationFlag'] == 1]
    dictionary = dictionary[dictionary['AffectCategory'] != 'positive']
    dictionary = dictionary[dictionary['AffectCategory'] != 'negative']
    return dictionary

In [240]:
def report_load(output_folder,output_file):
    # data load# report load
    print(output_folder + output_file)
    tmp=[]
    target_group_name= output_file.split("_")[1]
    if target_group_name != "male" and target_group_name != "female":
        target_group_name+= " " + output_file.split("_")[2]
    with open(output_folder + output_file, 'r') as f:
        data = json.load(f)
        values = list(data.values())
        for i,value in enumerate(values):
            try:    
                if "{" not in value:
                    tmp.append({"emotion":"Cannot adopt emotion","explanation":value.strip(),"target_group":target_group_name})
                elif "adopt the identity" in value or 'racial or ethnic' in value or 'I cannot fulfill your request' in value or 'I cannot provide a response' in value:
                    tmp.append({"emotion":"Cannot adopt emotion","explanation":value.strip(),"target_group":target_group_name})
                elif "}" not in value:
                    if value[-1]=="\"":
                        try_again = json.loads(value.strip() + "}")
                    else:
                        try_again = json.loads(value.strip() + "\"}")
                    tmp.append(try_again)
                elif '\n\n' in value:
                    value = value.split('\n\n')[1]
                    tmp.append(json.loads(value.strip()))
                elif '\r\n\r\n' in value:
                    value = value.split('\r\n\r\n')
                    if len(value) <4:
                        continue
                    value = value[3]
                    tmp.append(json.loads(value.strip()))
                else:
                    tmp.append(json.loads(value.strip()))
            except Exception as e:
                print(e)
                if value != '':
                    print(value.strip())
                    print(i,"\t",value.strip())
                    
        values = tmp
        emotion = pd.DataFrame(values)
        emotion['emotion'] = emotion['emotion'].str.lower()
    return emotion

In [241]:
# Define a function to handle the replacement logic
def replace_none(row):
    if pd.isna(row['AffectCategory']):
        if 'cannot adopt emotion' in row['emotion'].lower():
            return 'Cannot adopt emotion'
        else:
            return 'Cannot map the emotion'
    else:
        return row['AffectCategory']


def merged_data(emotion,dictionary,output_folder,output_file):
    # join
    merged = pd.merge(emotion, dictionary, left_on='emotion', right_on='term', how='left')
    merged['AffectCategory'] = merged.apply(replace_none, axis=1)
    os.makedirs(output_folder+"/refactored/", exist_ok=True)
    merged.to_csv(output_folder+"/refactored/" + output_file.replace('.json', '_refactored.csv'), index=False, header=True, sep='\t')
    target_group = merged.drop(columns=['emotion','explanation','term','AssociationFlag']).groupby('AffectCategory').count()
    print("The output is the following: \n",target_group)
    return merged

In [242]:
dictionary = load_dictionary(dictionary_folder, dictionary_file)
for output_file in os.listdir(output_folder):
    if 'refactored' in output_file or 'images' in output_file:
        continue
    emotion = report_load(output_folder,output_file)
    merged = merged_data(emotion,dictionary,output_folder,output_file, word_column= 'German' if 'german' in output_file else "English Word")

./output/Llama-2-7b-chat-hf/english_Asian_man_1_results.json
The unique target groups are the following: 
 ['Asian man']
The output is the following: 
                         target_group
AffectCategory                      
Cannot adopt emotion             688
Cannot map the emotion            28
anger                             58
anticipation                       7
disgust                           98
fear                              83
joy                               29
sadness                          206
surprise                           6
trust                            100
./output/Llama-2-7b-chat-hf/english_Asian_man_2_results.json
The unique target groups are the following: 
 ['Asian man']
The output is the following: 
                         target_group
AffectCategory                      
Cannot adopt emotion             981
Cannot map the emotion             3
anger                              2
anticipation                       1
disgust                       