In [1]:
import os
import random
from riveter import Riveter

<br><br><br><br>

# **Small demo w/ Sap frames** 

In [2]:
example_stories = ["I was just thinking about walking down the street, when my shoelace snapped. I had to call my doctor to pick me up. I felt so bad I also called my friend Katie, who came in her car. She was a lifesaver. My friend Jack is nice.",
                   "My doctor fixed my shoe. I thanked him. Then Susan arrived. Now she is calling the doctor too.",
                   "She went to the store. She thanked the doctor."]
text_ids = [0, 1]

In [3]:
riveter = Riveter()
riveter.load_sap_lexicon('power')
riveter.train(example_stories,
              text_ids)

 67%|██████▋   | 2/3 [00:00<00:00, 19.42it/s]

2023-02-22 22:27:33 Complete!





In [4]:
riveter.get_score_totals()

{'i': -0.3333333333333333, 'my doctor': 1.0, 'susan': -0.5}

In [5]:
riveter.get_persona_cluster('my doctor')

{'my doctor': 2, 'him': 1, 'the doctor': 1}

In [6]:
example_stories[0]

'I was just thinking about walking down the street, when my shoelace snapped. I had to call my doctor to pick me up. I felt so bad I also called my friend Katie, who came in her car. She was a lifesaver. My friend Jack is nice.'

In [7]:
riveter.get_scores_for_doc(0)

{'i': -1.0, 'my doctor': 1.0}

In [8]:
riveter.count_nsubj_for_doc(0)

{('i', 'think'): 1, ('i', 'have'): 1, ('i', 'feel'): 1, ('i', 'call'): 1}

In [9]:
riveter.count_dobj_for_doc(0)

{('i', 'pick'): 1, ('my doctor', 'call'): 1}

In [10]:
example_stories[1]

'My doctor fixed my shoe. I thanked him. Then Susan arrived. Now she is calling the doctor too.'

In [11]:
riveter.get_scores_for_doc(1)

{'my doctor': 3.0, 'susan': -1.0, 'i': -1.0}

In [12]:
riveter.count_nsubj_for_doc(1)

{('my doctor', 'fix'): 1,
 ('susan', 'arrive'): 1,
 ('susan', 'call'): 1,
 ('i', 'thank'): 1}

In [13]:
riveter.count_dobj_for_doc(1)

{('my doctor', 'thank'): 1, ('my doctor', 'call'): 1}

In [14]:
riveter = Riveter()
riveter.load_sap_lexicon('agency')
riveter.train(example_stories,
              text_ids)

 67%|██████▋   | 2/3 [00:00<00:00, 35.78it/s]

2023-02-22 22:27:51 Complete!





In [15]:
riveter.get_score_totals()

{'i': 0.3333333333333333, 'my doctor': 0.25, 'susan': 0.5}

<br><br><br><br>

# **Small demo w/ Rashkin frames**

In [16]:
example_stories = ["I was just thinking about walking down the street, when my shoelace snapped. I had to call my doctor to pick me up. I felt so bad I also called my friend Katie, who came in her car. She was a lifesaver. My friend Jack is nice.",
                   "My doctor fixed my shoe. I thanked him. Then Susan arrived. Now she is calling the doctor too."]
text_ids = [0, 1]

In [17]:
riveter = Riveter()
riveter.load_rashkin_lexicon('effect')
riveter.train(example_stories,
              text_ids)

100%|██████████| 2/2 [00:00<00:00, 35.06it/s]

2023-02-22 22:28:00 Complete!





In [18]:
riveter.get_score_totals()

{'i': 0.34444444444450006, 'my doctor': 0.35, 'susan': 0.16666666666649999}

In [19]:
riveter.get_scores_for_doc(0)

{'i': 1.4000000000000001, 'my doctor': 0.266666666667}

In [20]:
riveter.get_scores_for_doc(1)

{'susan': 0.33333333333299997,
 'i': 0.666666666667,
 'my doctor': 1.133333333333}

In [21]:
riveter = Riveter()
riveter.load_rashkin_lexicon('value')
riveter.train(example_stories,
              text_ids)

100%|██████████| 2/2 [00:00<00:00, 36.22it/s]

2023-02-22 22:28:10 Complete!





In [22]:
riveter.get_score_totals()

{'i': 0.6444444444445,
 'my doctor': 0.8000000000002501,
 'susan': 0.5333333333335}

<br><br><br><br>

# **Bigger demo w/ Sap frames and example dataset**

In [23]:
texts = []
text_ids = []
stories_path = '/Users/maria/Documents/data/narrativity/litbank/original'   # Litbank corpus here: https://github.com/dbamman/litbank

j = 0
for _file_name in os.listdir(stories_path):
    _lines = []
    for _line in open(stories_path + '/' + _file_name, 'r'):
        if _line.strip():
            _lines.append(_line.strip())

    # Randomly sample 100 paragraphs from each book
    for _line in random.sample(_lines, 100):        
        texts.append(_line)
        text_ids.append(j)
        j += 1

len(texts), len(text_ids)

(10000, 10000)

In [24]:
riveter = Riveter()
riveter.load_sap_lexicon('power')
riveter.train(texts,
             text_ids)

100%|██████████| 10000/10000 [01:33<00:00, 106.97it/s]

2023-02-22 22:29:49 Complete!





In [25]:
persona_score_dict = riveter.get_score_totals()
len(persona_score_dict)

704

In [26]:
for _persona, _score in sorted(persona_score_dict.items(), key=lambda x: x[1], reverse=True)[:20]:
    print(_score, '\t', _persona)

1.0 	 daredevil
1.0 	 my dear man
1.0 	 dr. morton
1.0 	 dear georgiana
1.0 	 edna
1.0 	 even the kitchen
1.0 	 george, jr.
1.0 	 mrs. grose
1.0 	 esau
1.0 	 the wolfes
1.0 	 i know
1.0 	 old ben
1.0 	 sibyl vane's
1.0 	 poor dreadful mr. isaacs
1.0 	 cassandra
1.0 	 girls
1.0 	 both pedro
1.0 	 auchincloss's query
1.0 	 mr. boldwood
1.0 	 miss millward


In [32]:
# The scores above are skewed by infrequent entities.
# For example, an entity could appear just once in the dataset, and if it has power in that one instance, it will have a score of 1.0.
# We can remove the infrequent entities to clean up the ranked list.
# (Note that this wasn't a great test dataset because most entities don't occur very often; hopefully the NYT data is better!)

persona_count_dict = riveter.get_persona_counts()
persona_score_dict = {p: s for p, s in persona_score_dict.items() if persona_count_dict[p] >= 5}
len(persona_score_dict)

31

In [33]:
for _persona, _score in sorted(persona_score_dict.items(), key=lambda x: x[1], reverse=True):
    print(round(_score, 2), '\t', _persona)

0.33 	 tom
0.3 	 andy
0.27 	 your
0.26 	 mary
0.21 	 dick
0.2 	 the woman
0.2 	 jane
0.2 	 jock
0.19 	 helen
0.17 	 anne
0.15 	 the doctor
0.13 	 you
0.1 	 i
0.1 	 the old man
0.09 	 george
0.08 	 a woman
0.08 	 margaret
0.06 	 "i
0.04 	 myself
0.04 	 a man
0.02 	 the man
0.0 	 man
0.0 	 dale
0.0 	 philip
0.0 	 sir
0.0 	 joe
0.0 	 emma
0.0 	 alice
-0.1 	 woman
-0.11 	 anthony
-0.23 	 frank


In [34]:
riveter.get_persona_cluster('mary')

{'mary': 30, 'she': 4, 'you': 2, 'herself': 1, 'her': 1}

In [35]:
riveter.get_persona_cluster('you')

{'you': 930,
 'i': 37,
 'me': 8,
 'she': 3,
 'he': 2,
 'myself': 1,
 'yours': 1,
 'yourself': 1}

In [36]:
riveter.get_persona_cluster('the man')

{'the man': 42, 'him': 1, 'you': 1, 'i': 3, 'he': 1}