In [1]:
import pandas
from konlpy.tag import Mecab

tokenizer = Mecab()

In [2]:
meta_sex_dict = {'FEMALE': 0, 'MALE': 1}
meta_age_dict = {'STUDENT': 0, 'COLLEGIAN': 1, 'CIVILIAN': 2}
meta_relation_dict = {'FRIEND': 0, 'LOVER': 1}

input_tsv = "gen_scene_new.tsv"
temp_json = "gen_temp.json"
output_json = "gen_scenario.json"


def labeling_sentiment(sentiment_val):
    if sentiment_val < -0.2:
        return "StrongNeg"
    elif sentiment_val < -0.05:
        return "WeakNeg"
    elif sentiment_val < 0.09:
        return "Neutral"
    elif sentiment_val < 0.2:
        return "WeakPos"
    else:
        return "StrongPos"


In [3]:
dialogs = []
with open(input_tsv) as gen_scene:
    sessions = gen_scene.read().strip().split("\n\t\n")

for session in sessions:
    lines = session.strip().split("\n")
    dialog= []
    lines = lines[3:]
    for line in lines:
        turn, utter = line.split("\t")
        dialog.append((turn,utter))
    dialogs.append(dialog)

In [4]:
import itertools
all_keys = [list(meta_sex_dict.keys()), list(meta_age_dict.keys()), list(meta_relation_dict.keys())]
sex_age_keys = [list(meta_sex_dict.keys()), list(meta_age_dict.keys())]
#print(list(itertools.product(*all_keys)))
    
    
    

In [5]:
session_dict_list = []
for dialog_idx, dialog in enumerate(dialogs):
    overall_utts = []
    for line in dialog:
        turn_marker = line[0]
        turn_sentence = line[1]
        overall_utts.append((turn_marker, tokenizer.morphs(turn_sentence), turn_sentence))
    
    for my_sex, my_age, relation in itertools.product(*all_keys):
        for your_sex, your_age in itertools.product(*sex_age_keys):
            session_dict = {}
            session_dict['session_idx_str'] = dialog_idx
            session_dict['A'] = {'age': meta_age_dict[my_age], 'age_group': my_age,
                                 'sex': meta_sex_dict[my_sex], 'sex_group': my_sex,
                                 'relation': meta_relation_dict[relation],
                                 'relation_group': relation}

            session_dict['B'] = {'age': meta_age_dict[your_age], 'age_group': your_age,
                                 'sex': meta_sex_dict[your_sex], 'sex_group': your_sex,
                                 'relation': meta_relation_dict[relation],
                                 'relation_group': relation}

            session_dict['topic'] = "핑퐁대화"
            session_dict['prompt'] = "자유대화를 나눠보세요."
            session_dict['utts'] = overall_utts
            session_dict_list.append(session_dict)

In [6]:
import json
with open(temp_json, 'w', encoding='utf-8') as f:
    json.dump(session_dict_list, f, ensure_ascii=False, indent=4)

In [7]:
import dialog_sentiment
sent_extractor = dialog_sentiment.SentimentExtractor("pingpongapi/config_dev.yaml")

  return yaml.load(f)


INFO:tensorflow:Restoring parameters from /scatter/workspace/pingpong/models/emotion_model_data/reaction_multitask/best_NDCG.ckpt-321728
INFO:tensorflow:Restoring parameters from /scatter/workspace/pingpong/models/emotion_model_data/emojiness/best_NDCG.ckpt-107630




In [8]:
result = sent_extractor.extract_sentiment_from_json(temp_json, temp_json, 60)

100%|██████████| 720/720 [00:03<00:00, 203.88it/s]


In [11]:
import copy, tqdm
with open(temp_json, "r") as json_reader:
    json_content = json.load(json_reader)
output_json_content = copy.deepcopy(json_content)
for session_idx, session in enumerate(tqdm.tqdm(json_content)):
    session_utts = session['utts']
    for turn_idx, turn_info in enumerate(session_utts):
        senti_val = turn_info[3]
        target_line = output_json_content[session_idx]['utts'][turn_idx]
        target_line.append(labeling_sentiment(senti_val))

with open(output_json, 'w', encoding='utf-8') as f:
    json.dump(output_json_content, f, ensure_ascii=False, indent=4)
        

100%|██████████| 720/720 [00:00<00:00, 221448.92it/s]
