# Import Libraries

In [13]:
from collections import defaultdict
from datetime import datetime
import os
import random

import numpy as np
import pandas as pd

import spacy
nlp = spacy.load('en_core_web_sm')

from spacy.lemmatizer import Lemmatizer

# from spacy.lang.en import LEMMA_INDEX, LEMMA_EXC, LEMMA_RULES # for spaCy 2.1 and earlier 
from spacy.lang.en import English   # for spaCy 2.2

# lemmatizer = Lemmatizer(LEMMA_INDEX, LEMMA_EXC, LEMMA_RULES) # for spaCy 2.1 and earlier 
lemmatizer = English.Defaults.create_lemmatizer()   # for spaCy 2.2

import neuralcoref
nlp.add_pipe(neuralcoref.NeuralCoref(nlp.vocab,blacklist=False),name="neuralcoref")

from main2 import ConnoFramer

## Add File Path to Desired Lexicon

In [14]:
# can be replaced with file path to custom lexicon 
lexicon_path = './FramesAgencyPower/agency_power.csv'

<br><br><br><br>

# Small demo

In [15]:
example_stories = ["I was just thinking about walking down the street, when my shoelace snapped. I had to call my doctor to pick me up. I felt so bad I also called my friend Katie, who came in her car. She was a lifesaver. My friend Jack is nice.",
                   "My doctor fixed my shoe. I thanked him. Then Susan arrived. Now she is calling the doctor too."]
text_ids = [0, 1]

In [17]:
framer = ConnoFramer()
framer.load_lexicon(lexicon_path, 'verb', 'power')
framer.train(example_stories,
             text_ids)

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00, 27.47it/s]

2023-02-20 20:32:38 Complete!





# Get Scores For All Documents

In [18]:
framer.get_score_totals()

defaultdict(<function main2.ConnoFramer.__score_dataset.<locals>.<lambda>()>,
            {'i': defaultdict(int, {'positive': 0, 'negative': 2}),
             'my doctor': defaultdict(int, {'positive': 4, 'negative': 0}),
             'my': defaultdict(int, {'positive': 0, 'negative': 1}),
             'susan': defaultdict(int, {'positive': 0, 'negative': 1})})

In [19]:
example_stories[0]

'I was just thinking about walking down the street, when my shoelace snapped. I had to call my doctor to pick me up. I felt so bad I also called my friend Katie, who came in her car. She was a lifesaver. My friend Jack is nice.'

# Specific Documents

## Get Scores for Specific Documents

In [20]:
framer.get_scores_for_doc(0)

defaultdict(<function main2.ConnoFramer.__score_document.<locals>.<lambda>()>,
            {'i': defaultdict(int, {'negative': 2, 'positive': 0}),
             'my doctor': defaultdict(int, {'positive': 1, 'negative': 0})})

In [11]:
framer.get_scores_for_doc(1)

defaultdict(<function main2.ConnoFramer.__score_document.<locals>.<lambda>()>,
            {'my': defaultdict(int, {'negative': 1, 'positive': 0}),
             'my doctor': defaultdict(int, {'positive': 3, 'negative': 0}),
             'susan': defaultdict(int, {'negative': 1, 'positive': 0})})

## Count Noun Subjects for Specific Documents

In [9]:
framer.count_nsubj_for_doc(0)

defaultdict(int,
            {('my doctor', 'be'): 1,
             ('i', 'think'): 1,
             ('i', 'have'): 1,
             ('i', 'feel'): 1,
             ('i', 'call'): 1})

In [12]:
framer.count_nsubj_for_doc(1)

defaultdict(int,
            {('my', 'thank'): 1,
             ('my doctor', 'fix'): 1,
             ('susan', 'arrive'): 1,
             ('susan', 'call'): 1})

## Count Direct Objects for Specific Documents

In [10]:
framer.count_dobj_for_doc(0)

defaultdict(int, {('my doctor', 'call'): 1, ('i', 'pick'): 1})

In [10]:
example_stories[1]

'My doctor fixed my shoe. I thanked him. Then Susan arrived. Now she is calling the doctor too.'

In [13]:
framer.count_dobj_for_doc(1)

defaultdict(int, {('my doctor', 'thank'): 1, ('my doctor', 'call'): 1})

<br><br><br><br>

# Bigger demo

In [18]:
texts = []
text_ids = []
stories_path = '/Users/maria/Documents/data/narrativity/litbank/original'   # Litbank corpus here: https://github.com/dbamman/litbank

j = 0
for _file_name in os.listdir(stories_path):
    _lines = []
    for _line in open(stories_path + '/' + _file_name, 'r'):
        if _line.strip():
            _lines.append(_line.strip())

    # Randomly sample 100 paragraphs from each book
    for _line in random.sample(_lines, 100):        
        texts.append(_line)
        text_ids.append(j)
        j += 1

len(texts), len(text_ids)

(10000, 10000)

In [19]:
framer = ConnoFramer()
framer.load_lexicon(lexicon_path, 'verb', 'power')
framer.train(texts,
             text_ids)

2023-02-06 12:07:08 Processed 0 out of 10000
2023-02-06 12:07:09 Processed 100 out of 10000
2023-02-06 12:07:10 Processed 200 out of 10000
2023-02-06 12:07:11 Processed 300 out of 10000
2023-02-06 12:07:12 Processed 400 out of 10000
2023-02-06 12:07:13 Processed 500 out of 10000
2023-02-06 12:07:14 Processed 600 out of 10000
2023-02-06 12:07:15 Processed 700 out of 10000
2023-02-06 12:07:16 Processed 800 out of 10000
2023-02-06 12:07:16 Processed 900 out of 10000
2023-02-06 12:07:17 Processed 1000 out of 10000
2023-02-06 12:07:18 Processed 1100 out of 10000
2023-02-06 12:07:19 Processed 1200 out of 10000
2023-02-06 12:07:20 Processed 1300 out of 10000
2023-02-06 12:07:21 Processed 1400 out of 10000
2023-02-06 12:07:22 Processed 1500 out of 10000
2023-02-06 12:07:23 Processed 1600 out of 10000
2023-02-06 12:07:24 Processed 1700 out of 10000
2023-02-06 12:07:24 Processed 1800 out of 10000
2023-02-06 12:07:25 Processed 1900 out of 10000
2023-02-06 12:07:26 Processed 2000 out of 10000
2023

In [20]:
persona_score_dict = framer.get_score_totals()
len(persona_score_dict)

296

In [21]:
persona_sum_dict = {_persona: _category_score_dict['positive']-_category_score_dict['negative'] for _persona, _category_score_dict in persona_score_dict.items()}
len(persona_sum_dict)

296

In [22]:
for _persona, _sum in sorted(persona_sum_dict.items(), key=lambda x: x[1], reverse=True)[:20]:
    print(_persona)
    print(_sum, persona_score_dict[_persona]['positive'], persona_score_dict[_persona]['negative'])
    print()

i
138 260 122

you
20 51 31

charles
4 4 0

mrs. todd
4 4 0

margaret
3 4 1

carol
3 3 0

anne
3 3 0

tarzan
3 3 0

mitchell
2 2 0

noah
2 2 0

frank
2 2 0

mr. casaubon
2 2 0

mr. hale
2 2 0

mrs. radford
2 2 0

mrs. dean
2 2 0

laurie
2 2 0

meg
2 2 0

a doctor
2 2 0

john
2 2 0

jock
2 3 1

