# **Final Project**

Gillian Flynn's novels:
- *Sharp Objects* (2006)
- *Dark Places* (2009)
- *Gone Girl* (2012)

In [1]:
# project imports
import json
from collections import Counter
import pandas as pd

In [2]:
# functions for extracting novel information
# note: leveraged from the BookNLP documentation
def proc(filename):
    with open(filename) as file:
        data=json.load(file)
    return data

def get_counter_from_dependency_list(dep_list):
    counter=Counter()
    for token in dep_list:
        term=token["w"]
        tokenGlobalIndex=token["i"]
        counter[term]+=1
    return counter

# function for extracting character information
# note: leveraged from the BookNLP documentation
def create_character_data(data, printTop):
    character_data = {}
    for character in data["characters"]:

        agentList=character["agent"]
        patientList=character["patient"]
        possList=character["poss"]
        modList=character["mod"]

        character_id=character["id"]
        count=character["count"]

        referential_gender_distribution=referential_gender_prediction="unknown"

        if character["g"] is not None and character["g"] != "unknown":
            referential_gender_distribution=character["g"]["inference"]
            referential_gender=character["g"]["argmax"]

        mentions=character["mentions"]
        proper_mentions=mentions["proper"]
        max_proper_mention=""

        # create some empty lists we can append to
        poss_items = []
        agent_items = []
        patient_items = []
        mod_items = []
        
        # just print out information about named characters
        if len(mentions["proper"]) > 0:
            max_proper_mention=mentions["proper"][0]["n"]
            for k, v in get_counter_from_dependency_list(possList).most_common(printTop):
                poss_items.append((v,k))
            for k, v in get_counter_from_dependency_list(agentList).most_common(printTop):
                agent_items.append((v,k))
            for k, v in get_counter_from_dependency_list(patientList).most_common(printTop):
                patient_items.append((v,k))
            for k, v in get_counter_from_dependency_list(modList).most_common(printTop):
                mod_items.append((v,k))
        
            # print(character_id, count, max_proper_mention, referential_gender)
            character_data[character_id] = {"name": max_proper_mention,
                                            "id": character_id,
                                            "timesMentioned": count,
                                            "gender": referential_gender,
                                            "possList": poss_items,
                                            "agentList": agent_items,
                                            "patientList": patient_items,
                                            "modList": mod_items}
    
    return character_data

## *Gone Girl*

### **Character Analysis**

- possList: list of nouns that the character possesses
- agentList: list of actions that the character does
- patientList: list of actions done to the character
- modList: list of words used to describe the character

In [3]:
data = proc(r"C:\Users\tracypaige\LTCS180\bookNLP_gillian_flynn\gone_girl\gone_girl.book")
character_data = create_character_data(data, 50)
df = pd.DataFrame(character_data).T.reset_index(drop=True)
df.head()

Unnamed: 0,name,id,timesMentioned,gender,possList,agentList,patientList,modList
0,Amy,185,1977,she/her,"[(20, parents), (12, husband), (6, hair), (6, ...","[(31, said), (26, ’s), (22, want), (19, had), ...","[(13, kill), (10, love), (10, know), (8, find)...","[(11, pregnant), (6, afraid), (5, woman), (4, ..."
1,Nick,178,1937,he/him/his,"[(47, wife), (15, mom), (15, dad), (9, face), ...","[(27, said), (26, know), (21, have), (20, say)...","[(12, love), (6, framing), (5, tell), (4, ask)...","[(7, man), (4, guy), (3, angry), (3, sure), (2..."
2,Go,180,658,she/her,"[(4, beer), (4, face), (3, eyes), (2, thoughts...","[(37, said), (11, think), (9, had), (8, know),...","[(5, love), (4, tell), (3, told), (2, saw), (2...","[(1, teary), (1, slender), (1, person), (1, fi..."
3,Desi,272,617,he/him/his,"[(5, mother), (5, house), (3, eyes), (3, arm),...","[(22, said), (20, says), (12, had), (11, know)...","[(3, picture), (3, killed), (2, meet), (2, ask...","[(2, guy), (2, able), (1, figure), (1, kid), (..."
4,Boney,225,523,she/her,"[(4, head), (4, hair), (3, eyes), (3, hands), ...","[(75, said), (19, asked), (10, know), (9, look...","[(4, tell), (4, told), (4, kill), (2, met), (2...","[(1, convinced), (1, open), (1, frightened), (..."


- how to visualize this information?

### **Character Network**

- should we base the network off of the number of interactions between characters (co-occurence data), or should we try to base the network off of sentiment analysis (probably using vader)?
- best option(?) - probably use the .quotes file to extract co-occurence id and perform sentiment analysis on the quote, then store this information in a matrix and find a tool to convert the matrix into a character network visualization

- vector space analysis for analysis across all the books?