In [1]:
import os
from os.path import join
import json
import re
from collections import Counter

In [2]:
def get_counter_from_dependency_list(dep_list):
    counter=Counter()
    for token in dep_list:
        term=token["w"]
        tokenGlobalIndex=token["i"]
        counter[term]+=1
    return counter

def list_words(character,parameter):
    words = []
    for token in character[parameter]:
        words.append(token['w'])
    return words

The file has data about the following aspects:

* agent - actions that character does
* patient - actions done to that character
* mod - adjectives that describe them in the text
* poss - things the entity has (very broadly defined), e.g. relatives like aunt, uncle; or parts of the body, e.g. head, back, etc.
* id - their unique id (as seen above)
* g - analysis about gender pronouns used
* count - number of times the entity appears
* mentions - how the character is referenced

In [3]:

file = open( join('Data',f'emma.book') )
book_data=json.load(file)


for character in book_data["characters"]:
    
    character_name = ''
    gender = ''
    
    if len(character["mentions"]['proper'])>0:
        character_name = character["mentions"]['proper'][0]['n']

    referential_gender_distribution=referential_gender_prediction="unknown"

    if character["g"] is not None and character["g"] != "unknown":
        referential_gender_distribution=character["g"]["inference"]
        gender=character["g"]["argmax"]
        
    count = character['count']
    
    if len(character_name)>0:

        print( f"\n\n{character_name} ({gender}) {count} appearances")
        
        print('Words used to describe the character:')
        adjectives = list_words(character,'mod')
        print('; '.join(set(adjectives)))
        
        print('\"Possessions\" of the character:')
        possessions = list_words(character,'poss')
        print('; '.join(set(possessions)))
        
        print('\"Verbs\" used with this character:')
        agents = list_words(character,'agent')
        print('; '.join(set(agents)))



Emma (she/her) 3642 appearances
Words used to describe the character:
ready; aware; happy; hours; ashamed; cruel; sensible; interested; behindhand; quick; deficient; support; impertinent; sorry; right; angel; half; engaged; eager; wretchedest; loveliness; serious; determined; disappointed; well; times; thinking; able; convinced; late; certain; gratitude; cooler; first; easy; used; bad; insolent; dreamer; kind; mistress; insincere; youngest; likely; months; civil; confident; natural; confused; shocked; fortunate; humble; warm; pleasure; cross; open; astonished; ingratitude; thankful; attentive; fit; depressed; unsuspicious; pretty; safe; busy; rude; sure; judge; enemy; creature; thoughtless; scheme; gratified; concerned; surprized.—Mr; afraid; gay; ignorant; surprized; old; fearful; companion; brutal; alone; delighted; compassionate; pleased; willing; inclined; glad; ungrateful; scrupulous; friend; comfortable; satisfied; sort; silent; girl; absent; equal; mistaken; unfeeling; worst; 

In [4]:
texts = ['Emma', 
         'Northanger_Abbey',
         'Persuasion',
         'Mansfield_Park',
         'Pride_and_Prejudice',
         'Sense_and_Sensibility']

In [5]:
for text in texts:
    print(text, end='\n\n')
    
    file = open( join('Data',f'{text.lower()}.book') )
    book_data=json.load(file)
    
    characters = dict()
    male = []
    female = []


    for character in book_data["characters"]:

        character_name = ''
        gender = ''

        if len(character["mentions"]['proper'])>0:
            character_name = character["mentions"]['proper'][0]['n']

        referential_gender_distribution=referential_gender_prediction="unknown"

        if character["g"] is not None and character["g"] != "unknown":
            referential_gender_distribution=character["g"]["inference"]
            gender=character["g"]["argmax"]

        count = character['count']

        if len(character_name)>0 and count >= 10:
            if re.search( '^he/him/his' , gender):
                male.append(character_name)
            else:
                female.append(character_name)
                
    print(f'Male characters:')
    print('; '.join(male))
    print(f'\nFemale characters:')
    print('; '.join(female))
    
    
    male_words = []
    female_words = []

    for character in book_data["characters"]:

        character_name = ''
        gender = ''

        if len(character["mentions"]['proper'])>0:
            character_name = character["mentions"]['proper'][0]['n']

        adjectives = list_words(character,'mod')
        possessions = list_words(character,'poss')
        agents = list_words(character,'agent')

        all_words = adjectives + possessions + agents

        if character_name in male:
            male_words.extend( all_words )
        elif character_name in female:
            female_words.extend( all_words )

    male_words = Counter(male_words)
    female_words = Counter(female_words)


    print('\nWords that are used for female characters only:')

    for word,count in female_words.most_common():
        if word not in male_words.keys():
            print( f'{word} ({count})' , end='; ')

    print('\n\nWords that are used for male characters only:\n')

    for word,count in male_words.most_common():
        if word not in female_words.keys():
            print( f'{word} ({count})' , end='; ')


Emma

Male characters:
Mr. Knightley; Frank Churchill; Mr. Elton; Mr. Weston; Mr. Woodhouse; Mr. Martin; Mr. Perry; Colonel Campbell; Mr. Dixon; Mr. Cole; the Churchills; William Larkins; Henry; Mr. E.; Mr. Wingfield; the Eltons; Enscombe; Mr. Suckling

Female characters:
Emma; Harriet; Miss Fairfax; Mrs. Elton; Mrs. Weston; Miss Bates; Miss Woodhouse; Miss Taylor; Isabella; Mrs. Churchill; Mrs. Goddard; Mrs. Cole; the Coles; Mrs. Bates; Harriet; Randalls; Miss Smith!—Miss Smith; Mrs. John Knightley; Mrs. Dixon; Mrs. Martin; Miss Nash; Mrs. Weston.—“Very; Miss Hawkins; Mrs. Perry; the Campbells; James; Miss Woodhouse.—Dear Miss Woodhouse; Hannah; Miss Campbell; Donwell; Selina; God; Patty; the Martins; Miss Woodhouse; Mrs. Campbell; Mrs. Ford; Elizabeth

Words that are used for female characters only:


husband (20); girl (13); niece (12); woman (10); sense (9); fancy (9); allowed (9); sensations (8); played (8); send (8); grandmother (8); right (7); laughed (7); run (7); convinced (6); imagination (6); conscience (6); walk (6); honour (6); chose (6); ran (6); struck (6); advise (6); ill (6); beauty (6); play (6); safe (5); serious (5); brain (5); memory (5); patience (5); partner (5); long (5); drew (5); ordered (5); hurried (5); guessed (5); approve (5); regret (5); blushed (5); venture (5); recovered (5); longing (5); unwell (5); grandmama (5); ungrateful (4); herself (4); society (4); vanity (4); faults (4); attempts (4); assistance (4); dislike (4); purse (4); understanding (4); tongue (4); playing (4); boot (4); intimacy (4); fixed (4); waited (4); judged (4); pretend (4); endure (4); forgive (4); smiling (4); waiting (4); enjoyed (4); dared (4); submitted (4); required (4); work (4); enjoy (4); history (4); illness (4); quick (3); mistress (3); willing (3); comfortable (3); gue

Male characters:
Catherine; Henry; Eleanor; James; Captain Tilney; Mr. Thorpe; Miss Tilney; Mrs. Allen; John; Mr. Allen; Robinson; General Tilney; Old Allen

Female characters:
Isabella; Miss Morland; Mrs. Morland; Mrs. Thorpe; Mrs. Hughes; Miss Thorpe; Isabella; the Tilneys; Dear Catherine; Mrs. Radcliffe; Mrs. Tilney; Miss Andrews; the Thorpes; the Allens; Udolpho; Mother; Anne; Maria; the Morlands; Sarah

Words that are used for female characters only:
daughters (4); woman (3); desire (3); set (3); learned (3); charge (3); silent (2); gratification (2); resistance (2); inclination (2); movements (2); dancing (2); faith (2); profit (2); broke (2); gained (2); rose (2); resolved (2); distress (2); anxiety (2); room (2); folly (2); Isabella (2); awoke (2); waiting (2); learn (2); Catherine (2); created (2); allowed (2); entertained (2); visitor (2); embraced (2); schoolfellows (2); party (2); air (2); relationship (2); progress (2); kind (2); works (2); days (2); confidante (2); handso

man (26); wife (16); marriage (7); son (7); fortune (6); stood (5); proposed (5); profession (4); addresses (3); regard (3); sought (3); choose (3); wants (3); talks (3); resolution (3); praise (3); pen (3); turn (3); laughed (3); worth (2); nice (2); better (2); heir (2); absent (2); unfeeling (2); servants (2); card (2); opinions (2); visits (2); expressions (2); choice (2); respects (2); Clay (2); language (2); passing (2); became (2); maintained (2); stepped (2); begged (2); described (2); forgiven (2); led (2); regretting (2); turning (2); curacy (2); disposition (2); mouth (2); hung (2); writing (2); folding (2); shot (2); history (2); eldest (2); aunt (2); persevered (2); condescend (2); nodded (2); shy (2); attentive (2); unjust (2); reading (2); does (2); lucky (2); running (2); attempts (2); curricle (2); steps (2); live (2); protested (2); shewed (2); roof (2); gloves (2); fashioned (2); gratitude (2); horses (2); hastened (2); means (2); tells (2); repeats (2); fortnight (1

Ladyship (37); spirits (17); nephew (12); astonishment (11); woman (9); girl (8); longed (8); finished (8); got (8); dislike (7); curiosity (7); turn (7); remembered (7); place (6); reply (6); prefer (6); watched (6); blushed (6); dreaded (6); rejoiced (6); endeavoured (6); wondered (6); laughed (6); added,-- (6); meet (6); ventured (6); girls (6); home (6); prepared (5); appearance (5); age (5); part (5); loved (5); anticipated (5); shocked (4); mistress (4); flattered (4); disappointed (4); hopes (4); inquiries (4); neighbours (4); interest (4); sake (4); reflections (4); let (4); refuse (4); was (4); caught (4); persuade (4); doubted (4); coloured (4); long (4); reached (4); became (4); calling (4); housekeeping (4); poultry (4); unwilling (3); deal (3); ill (3); thankful (3); comfortable (3); right (3); uncivil (3); blind (3); eager (3); uncomfortable (3); equal (3); beauty (3); question (3); eye (3); figure (3); hair (3); petticoat (3); Bingley (3); song (3); anxiety (3); work (3)

man (15); coming (9); merits (6); embarrassment (5); constancy (5); soul (4); Buildings (4); sort (3); rich (3); curricle (3); designs (3); cut (3); age (3); patient (3); principles (3); abode (3); offered (3); actions (3); staying (3); choice (3); offence (3); scorn (3); affectionate (2); deceitful (2); lucky (2); gay (2); talents (2); pointer (2); estate (2); groom (2); integrity (2); inconstancy (2); inquiry (2); repentance (2); intends (2); addressing (2); kissed (2); showed (2); owned (2); feels (2); filled (2); listen (2); imputed (2); feigned (2); grave (2); old (2); listening (2); circumstances (2); setting (2); trouble (2); gift (2); cows (2); poultry (2); errand (2); stopt (2); falls (2); conversed (2); acted (2); free (2); plans (2); preference (2); please (2); drawn (2); issued (2); proposal (2); newspaper (2); cap (2); quiet (2); admires (2); duties (2); blamable (2); writes (2); droll (2); selfishness (2); conceit (2); deference (2); fulfil (2); repent (2); fellow (1); fa