In [1]:
import pandas as pd
import re
import json

In [2]:
# Get story text
with open('schoolmistress.txt') as f:
  sentences = f.read().splitlines()

words = []
for i in range(len(sentences)):
  for word in sentences[i].split(" "):
    word_cleaned = re.sub(r'[\",\(,\),\,,\;,\.,\?,\!,\:]', '', word)
    words.append({
      "sentence": i,
      "word": word_cleaned,
      "word_length": len(word_cleaned)
    })


In [3]:
# append story text to eyelink file
eyelink_data = pd.read_csv('ia_files/id17_schoolmistress.txt', sep='\t', low_memory=False)
eyelink_data.head()
words_df = pd.DataFrame(words)
ia_df_subset = eyelink_data[[ 'IA_DWELL_TIME', 'IA_REGRESSION_PATH_DURATION', 'IA_AVERAGE_FIX_PUPIL_SIZE', 
      'IA_REGRESSION_IN_COUNT','IA_REGRESSION_OUT_FULL_COUNT']] \
      .applymap(lambda x: None if x == '.' else x)
ia_df_clean = pd.DataFrame(ia_df_subset, dtype='float')
eyelink_with_text = pd.concat([words_df, ia_df_clean], axis=1)


In [4]:
# aggregate columns to get sentence vals
ia_sentences_df = eyelink_with_text.groupby('sentence')\
    .agg({'IA_DWELL_TIME': 'sum', 'IA_REGRESSION_PATH_DURATION': 'sum',
                  'IA_AVERAGE_FIX_PUPIL_SIZE': 'mean', 'IA_REGRESSION_IN_COUNT': 'sum',
                  'IA_REGRESSION_OUT_FULL_COUNT': 'sum'})

In [5]:
def convert_to_json(file_path):
  f = open(file_path, 'r')
  dict_list = f.readlines()
  return list(map(lambda x: json.loads(x), dict_list))

In [6]:
schoolmistress_emotion_json = convert_to_json('../emotional_story_arcs/data/kelsey/results/schoolmistress.emotion')

In [7]:
schoolmistress_sentiment = pd.read_csv('../emotional_story_arcs/data/kelsey/results/schoolmistress.sentiment', sep='\t', names=['negative', 'neutral', 'positive'])
schoolmistress_emotion = pd.DataFrame.from_dict(schoolmistress_emotion_json)

In [13]:
# Get highlight categories
highlights = pd.read_csv('./highlights/id17_schoolmistress.csv').drop(['Unnamed: 0', 'proportion'], axis=1)

In [15]:
# BookNLP features + some eye tracking features in one DataFrame by sentence
sentences_with_features = pd.concat([schoolmistress_sentiment, schoolmistress_emotion], axis=1)
sentences_with_features = pd.concat([sentences_with_features, ia_sentences_df], axis=1)
sentences_with_features = pd.concat([sentences_with_features, highlights], axis=1)

In [16]:
sentences_with_features.to_csv("./results/id17.csv")