In [3]:
from sklearn.model_selection import train_test_split

import utils.dataset_processors as dataset_processors

In [4]:
# Load the dataset
datafile = "data/essays/essays.csv"
dataset = dataset_processors.load_essays_df(datafile)

# Split the dataset (6:2:2)
train_data, temp_data = train_test_split(dataset, train_size=0.6, random_state=42)
validation_data, test_data = train_test_split(temp_data, train_size=0.2, random_state=42)

EXT :  EXT
1    1275
0    1192
Name: count, dtype: int64
NEU :  NEU
1    1234
0    1233
Name: count, dtype: int64
AGR :  AGR
1    1309
0    1158
Name: count, dtype: int64
CON :  CON
1    1254
0    1213
Name: count, dtype: int64
OPN :  OPN
1    1271
0    1196
Name: count, dtype: int64





In [11]:
# Convert from Dataframe to list of dictionary
x_train = train_data.to_dict('records')
x_test = test_data.to_dict('records')
x_val = validation_data.to_dict('records')

print(x_train[0].keys())

dict_keys(['user', 'text', 'token_len', 'EXT', 'NEU', 'AGR', 'CON', 'OPN'])


In [14]:
# Get the Big 5 labels
labels = [label for label in x_train[0].keys() if label not in ['user','text','token_len']]

# Forward and backward mapping
id2label = {idx:label for idx,label in enumerate(labels)}
label2id = {label:idx for idx,label in enumerate(labels)}

labels

['EXT', 'NEU', 'AGR', 'CON', 'OPN']

# Feature evaluation

In [1]:
import json
import codecs
import pickle

In [4]:
dataset = []

from pprint import pprint

with open('train_dataset_vad.pkl','rb') as file:
    dataset = pickle.load(file)

pprint(dataset[0][0])

{'act': '3',
 'personality_features': {'AGR': array([0.        , 2.0080483 , 2.4016094 , 0.        , 0.41861817,
       0.        , 0.5901311 , 0.        , 0.        , 0.        ,
       1.2648318 , 0.        , 0.5507349 , 0.        , 0.        ,
       0.        , 2.3315444 , 0.        , 0.94451207, 0.04680952,
       0.        , 0.61699086, 1.1855582 , 2.313045  , 0.        ,
       0.7509485 , 0.        , 0.745355  , 0.6079138 , 1.8577148 ,
       0.        , 1.2509583 , 0.        , 0.        , 0.46775097,
       0.        , 0.        , 0.        , 2.2193878 , 0.        ,
       0.        , 0.        , 3.4431014 , 0.25628236, 0.        ,
       0.        , 0.34821013, 0.0257862 , 0.        , 0.25395954],
      dtype=float32),
                          'CON': array([0.        , 0.49052936, 0.        , 1.0379113 , 0.9174472 ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.2529782 , 0.        , 2.436219  , 2.4117799 ,
       0.        , 1.112840

In [6]:
file_directory = 'dataset_erc\dailydialogue\dev.json'

train_file = []

with codecs.open(file_directory, "r", "utf-8") as f:
    train_file = json.load(f)

In [10]:
print(train_file[0])

[{'utterance': 'good morning , sir . is there a bank near here ?', 'sentiment': '0', 'act': '2'}, {'utterance': 'there is one . 5 blocks away from here ?', 'sentiment': '0', 'act': '1'}, {'utterance': "well , that's too far.can you change some money for me ?", 'sentiment': '0', 'act': '3'}, {'utterance': 'surely , of course . what kind of currency have you got ?', 'sentiment': '0', 'act': '2'}, {'utterance': 'rib .', 'sentiment': '0', 'act': '1'}, {'utterance': 'how much would you like to change ?', 'sentiment': '0', 'act': '2'}, {'utterance': '1000 yuan.here you are .', 'sentiment': '0', 'act': '1'}]


In [16]:
def get_personality():
    return 5

def iterate_conversation(conversation_list):
    conversation_with_personality = []

    for conversation in conversation_list:
        personality_features = get_personality()

        conversation['personality_features'] = personality_features
        conversation_with_personality.append(conversation)
        
    return conversation_with_personality
    

train_file_with_personality = []

for conversation_list in train_file:
    print(conversation_list)
    print('\n\n')
    conversation_with_personality = iterate_conversation(conversation_list)
    print(conversation_with_personality)
    break

[{'utterance': 'good morning , sir . is there a bank near here ?', 'sentiment': '0', 'act': '2', 'personality_features': 5}, {'utterance': 'there is one . 5 blocks away from here ?', 'sentiment': '0', 'act': '1', 'personality_features': 5}, {'utterance': "well , that's too far.can you change some money for me ?", 'sentiment': '0', 'act': '3', 'personality_features': 5}, {'utterance': 'surely , of course . what kind of currency have you got ?', 'sentiment': '0', 'act': '2', 'personality_features': 5}, {'utterance': 'rib .', 'sentiment': '0', 'act': '1', 'personality_features': 5}, {'utterance': 'how much would you like to change ?', 'sentiment': '0', 'act': '2', 'personality_features': 5}, {'utterance': '1000 yuan.here you are .', 'sentiment': '0', 'act': '1', 'personality_features': 5}]



[{'utterance': 'good morning , sir . is there a bank near here ?', 'sentiment': '0', 'act': '2', 'personality_features': 5}, {'utterance': 'there is one . 5 blocks away from here ?', 'sentiment': '0'