In [366]:
import re
import pandas as pd
import string
import pickle
from textblob import TextBlob
from tqdm._tqdm_notebook import tqdm_notebook
tqdm_notebook.pandas()
import spacy
from nltk.corpus import stopwords
import networkx as nx
from collections import Counter
import itertools
import numpy as np

import plotly.offline as py
import plotly.graph_objects as go

from sklearn.preprocessing import MinMaxScaler

In [434]:
with open('char_list.pkl', 'rb') as f:
    char_list = pickle.load(f)
all_books_nosw_singularized = pd.read_pickle('data/allbooks_sing_nosw.pkl')

important_chars = ['rand', 'perrin', 'mat', 'egwene', 'elayne', 'nynaeve', 'moiraine', 'min', 'faile', 'aviendha',
                  'gawyn', 'lan', 'siuan', 'morgase', 'cadsuane', 'ituralde', 'galad', 'pevara', 'tuon', 'elaida', 
                  'androl', 'taim', 'logain', 'gareth', 'rhuarc', 'graendal', 'moridin', 'moghedien', 'verin', 
                  'birgitte', 'loial', 'tam', 'demandred', 'sammael', 'moridin']

In [435]:
def get_chars_in_chapter(split_text, char_list):
    return [char for char in char_list if char in split_text]

def get_appearances(split_text, chars_in_chapter, graph_dict, indices_dict):
    """
    Get the number of appearances for all characters in a chapter and place the list of appearances into a dict
    
    """
    text_array = np.array(split_text)
    for char in chars_in_chapter:
        char_indices = np.where(text_array == char)[0]
        graph_dict[char] = len(char_indices)
        indices_dict[char] = char_indices

def get_all_interactions(chars_in_chapter, threshold, graph_dict, indices_dict):
    all_pairs = itertools.combinations(chars_in_chapter, 2)
    for pair in all_pairs:
        num_interactions = get_num_interactions( *pair, indices_dict, threshold)
        if num_interactions > 1:
            graph_dict[pair] = num_interactions

def get_num_interactions(char1, char2, indices_dict, threshold):
    char1_indices = indices_dict[char1]
    char2_indices = indices_dict[char2]
    interaction_distances = np.array([abs(interaction[0]-interaction[1]) for interaction in itertools.product(char1_indices,
                                                                                                    char2_indices)])
    num_interactions = len(np.where(interaction_distances <= threshold)[0])
    return num_interactions
    
def find_chapter_interactions(text, char_list, threshold):
    graph_dict = {}
    indices_dict = {}
    split_text = text.split()
    
    characters_in_chapter = get_chars_in_chapter(split_text, char_list)
    
    get_appearances(split_text, characters_in_chapter, graph_dict, indices_dict)
    
    get_all_interactions(characters_in_chapter, threshold, graph_dict,indices_dict)
    return graph_dict

In [436]:
def add_graphs(graph1, graph2):
    
    if not graph1:
        return graph2
    if not graph2:
        return graph1
    
    combined_graph = graph1.copy()
    for key, val in graph2.items():
        if key not in combined_graph.keys():
            combined_graph[key] = val
        else: combined_graph[key] +=val
    return combined_graph

def add_graphs_from_list(graph_list):
    combined_graph = graph_list[0]
    for new_graph in graph_list[1:]:
        combined_graph=add_graphs(combined_graph, new_graph)
    return combined_graph

def make_cumulative_graphs(graph_list):
    cumulative_graph_list = []
    
    cumulative_graph = graph_list[0]
    cumulative_graph_list.append(cumulative_graph.copy())
    
    for new_graph in graph_list[1:]:
        cumulative_graph=add_graphs(cumulative_graph, new_graph)
        cumulative_graph_list.append(cumulative_graph.copy())
        
    return cumulative_graph_list

In [437]:
def dict_to_graph(graph_dict):
    graph = nx.Graph()
    for key in graph_dict:
        if type(key) is str:
            graph.add_node(key, size = graph_dict[key])
        if type(key) is tuple:
            graph.add_edge(*key, weight = graph_dict[key])
    return graph

In [438]:
graphs = pd.DataFrame()

graphs['chapter_title'] = all_books_nosw_singularized['chapter_title']
graphs['cumulative_chapter_number'] = all_books_nosw_singularized['cumulative_chapter_number']

graphs['chapter_graph'] = all_books_nosw_singularized.progress_apply(lambda x: find_chapter_interactions(x['text'],important_chars,20),axis=1)

graphs['cumulative_graphs'] = make_cumulative_graphs(graphs['chapter_graph'])



G = dict_to_graph(graphs['cumulative_graphs'][676])

  0%|          | 0/677 [00:00<?, ?it/s]

In [439]:
graph1 = dict_to_graph(graphs['cumulative_graphs'][676])
graph2 = dict_to_graph(graphs['cumulative_graphs'][10])

In [474]:
def scale_edge_weights(graph):
    g = graph.copy()
    original_weights = []
    for edge in g.edges():
        original_weights.append(g.edges()[edge]['weight'])
    scaler = MinMaxScaler(feature_range=(.1,8))
    new_weights = scaler.fit_transform(np.array(original_weights).reshape(-1,1)).flatten()
    for i,edge in enumerate(g.edges()):
        g.edges()[edge]['weight'] = new_weights[i]
    return g

def plot_network(graph, save = False, fname = 'graph'):
    scaled = scale_edge_weights(graph)
    pos = nx.spring_layout(graph, k =.5 , seed = 1)
    
    edge_traces = []
    for edge in scaled.edges():
        char1  = edge[0]
        char2  = edge[1]
        x0, y0 = pos[char1]
        x1, y1 = pos[char2]
        trace  = make_edge([x0, x1, None], [y0, y1, None], scaled.edges()[edge]['weight'])
        edge_traces.append(trace)
        
    
    node_trace = go.Scatter(x         = [],
                        y         = [],
                        text      = [],
                        textposition = "top center",
                        textfont_size = 20,
                        mode      = 'markers+text',
                        hoverinfo = 'none',
                        marker    = dict(color = [],
                                         size  = [],
                                         line  = None))
    for node in scaled.nodes():
        x, y = pos[node]
        node_trace['x'] += tuple([x])
        node_trace['y'] += tuple([y])
        node_trace['marker']['color'] += tuple(['DarkSlateBlue'])
        node_trace['marker']['size'] += tuple([20])#tuple([np.log(scaled.nodes()[node]['size'])])
        node_trace['text'] += tuple([node.capitalize()])
        
        
    layout = go.Layout(paper_bgcolor='rgba(0,0,0,0)',plot_bgcolor='rgba(0,0,0,0)')
    fig = go.Figure(layout = layout)
    
    for trace in edge_traces:
        fig.add_trace(trace)
    fig.add_trace(node_trace)
    
    fig.update_layout(showlegend = False, width = 2000, height = 2000)
    fig.update_xaxes(showticklabels = False)
    fig.update_yaxes(showticklabels = False)
    
    if save:
        fig.write_image('network_graphs/'+fname+'.png')
    else:
        py.plot(fig, filename='test.html')

In [451]:
plot_network(graph2)

In [448]:
plot_network(graph1)


In [475]:
for i,graph_dict in enumerate(graphs['cumulative_graphs'][650:]):
    graph = dict_to_graph(graph_dict)
    plot_network(graph, save=True, fname = f'chap{i}')
    
    

In [476]:
from IPython.display import Image
import ipywidgets as widgets
from ipywidgets import interact, interact_manual

@interact
def show_pics(chap_num = (0,26,1)):
    display(Image(f'network_graphs/chap{chap_num}.png'))

interactive(children=(IntSlider(value=13, description='chap_num', max=26), Output()), _dom_classes=('widget-in…

In [383]:

def make_edge(x, y, width):
    
    '''Creates a scatter trace for the edge between x's and y's with given width

    Parameters
    ----------
    x    : a tuple of the endpoints' x-coordinates in the form, tuple([x0, x1, None])
    
    y    : a tuple of the endpoints' y-coordinates in the form, tuple([y0, y1, None])
    
    width: the width of the line

    Returns
    -------
    An edge trace that goes between x0 and x1 with specified width.
    '''
    return  go.Scatter(x         = x,
                       y         = y,
                       line      = dict(width = width,
                                   color = 'cornflowerblue'),
                       mode      = 'lines')