In [1]:
import re
import pandas as pd
import string
import pickle
from textblob import TextBlob
from tqdm._tqdm_notebook import tqdm_notebook
tqdm_notebook.pandas()
from nltk.corpus import stopwords
import networkx as nx
import itertools
import numpy as np

import plotly.offline as py
import plotly.graph_objects as go

from sklearn.preprocessing import MinMaxScaler

Please use `tqdm.notebook.*` instead of `tqdm._tqdm_notebook.*`
  from tqdm._tqdm_notebook import tqdm_notebook


In [2]:
with open('char_list.pkl', 'rb') as f:
    char_list = pickle.load(f)
all_books_nosw_singularized = pd.read_pickle('data/allbooks_sing_nosw.pkl')

important_chars = ['rand', 'perrin', 'mat', 'egwene', 'elayne', 'nynaeve', 'moiraine', 'min', 'faile', 'aviendha',
                  'gawyn', 'lan', 'siuan', 'morgase', 'cadsuane', 'ituralde', 'galad', 'pevara', 'tuon', 'elaida', 
                  'androl', 'taim', 'logain', 'gareth', 'rhuarc', 'graendal', 'moridin', 'moghedien', 'verin', 
                  'birgitte', 'loial', 'tam', 'demandred', 'sammael', 'thom']

## Functions for making graphs

In [3]:
def get_chars_in_chapter(split_text, char_list):
    return [char for char in char_list if char in split_text]

def get_appearances(split_text, chars_in_chapter, graph_dict, indices_dict):
    """
    Get the number of appearances for all characters in a chapter and place the list of appearances into a dict
    
    """
    text_array = np.array(split_text)
    for char in chars_in_chapter:
        char_indices = np.where(text_array == char)[0]
        graph_dict[char] = len(char_indices)
        indices_dict[char] = char_indices

def get_all_interactions(chars_in_chapter, threshold, graph_dict, indices_dict):
    all_pairs = itertools.combinations(chars_in_chapter, 2)
    for pair in all_pairs:
        num_interactions = get_num_interactions( *pair, indices_dict, threshold)
        if num_interactions > 1:
            graph_dict[pair] = num_interactions

def get_num_interactions(char1, char2, indices_dict, threshold):
    char1_indices = indices_dict[char1]
    char2_indices = indices_dict[char2]
    interaction_distances = np.array([abs(interaction[0]-interaction[1]) for interaction in itertools.product(char1_indices,
                                                                                                    char2_indices)])
    num_interactions = len(np.where(interaction_distances <= threshold)[0])
    return num_interactions
    
def find_chapter_interactions(text, char_list, threshold):
    graph_dict = {}
    indices_dict = {}
    split_text = text.split()
    
    characters_in_chapter = get_chars_in_chapter(split_text, char_list)
    
    get_appearances(split_text, characters_in_chapter, graph_dict, indices_dict)
    
    get_all_interactions(characters_in_chapter, threshold, graph_dict,indices_dict)
    return graph_dict

In [4]:
def add_graphs(graph1, graph2):
    
    if not graph1:
        return graph2
    if not graph2:
        return graph1
    
    combined_graph = graph1.copy()
    for key, val in graph2.items():
        if key not in combined_graph.keys():
            combined_graph[key] = val
        else: combined_graph[key] +=val
    return combined_graph

def add_graphs_from_list(graph_list):
    combined_graph = graph_list[0]
    for new_graph in graph_list[1:]:
        combined_graph=add_graphs(combined_graph, new_graph)
    return combined_graph

def make_cumulative_graphs(graph_list):
    cumulative_graph_list = []
    
    cumulative_graph = graph_list[0]
    cumulative_graph_list.append(cumulative_graph.copy())
    
    for new_graph in graph_list[1:]:
        cumulative_graph=add_graphs(cumulative_graph, new_graph)
        cumulative_graph_list.append(cumulative_graph.copy())
        
    return cumulative_graph_list

In [5]:
def dict_to_networkx(graph_dict):
    graph = nx.Graph()
    for key in graph_dict:
        if type(key) is str:
            graph.add_node(key, size = graph_dict[key])
        if type(key) is tuple:
            graph.add_edge(*key, weight = graph_dict[key])
    return graph

In [7]:
graphs = pd.DataFrame()

graphs['chapter_title'] = all_books_nosw_singularized['chapter_title']
graphs['cumulative_chapter_number'] = all_books_nosw_singularized['cumulative_chapter_number']

graphs['chapter_graph'] = all_books_nosw_singularized.progress_apply(lambda x: find_chapter_interactions(x['text'],important_chars,10),axis=1)

graphs['cumulative_graphs'] = make_cumulative_graphs(graphs['chapter_graph'])

  0%|          | 0/677 [00:00<?, ?it/s]

In [16]:
graph1 = dict_to_networkx(graphs['cumulative_graphs'][676])
graph2 = dict_to_networkx(graphs['cumulative_graphs'][10])
graph3 = dict_to_networkx(graphs['chapter_graph'][663])

## Plotting with plotly 

In [8]:
def scale_edge_weights(graph):
    """
    Scale the edge weights of a networkx graph for graphing
    """
    g = graph.copy()
    original_weights = []
    for edge in g.edges():
        original_weights.append(g.edges()[edge]['weight'])
    scaler = MinMaxScaler(feature_range=(.5,15))
    new_weights = scaler.fit_transform(np.array(original_weights).reshape(-1,1)).flatten()
    for i,edge in enumerate(g.edges()):
        g.edges()[edge]['weight'] = new_weights[i]
    return g


def make_edges(graph, pos, unscaled):
    edge_traces = []
    edge_text_xs = []
    edge_text_ys = []
    edge_text_labels = []
    for edge in graph.edges():
        width = graph.edges()[edge]['weight']
        if width < .6:
            continue
        transparency = max(.5,round(width/5,2))
        color_string = f'rgba(100, 149, 237, {transparency})'
        
        char1  = edge[0]
        char2  = edge[1]
        x0, y0 = pos[char1]
        x1, y1 = pos[char2]
        
        x = [x0, x1, None]
        y = [y0, y1, None]
        
        
        edge_trace = go.Scatter(x     = x,
                                y     = y,
                                line  = dict(width = width,
                                             color = color_string),
                                mode  = 'lines')
        edge_traces.append(edge_trace)
        edge_text_xs.append((x0+x1)/2)
        edge_text_ys.append((y0+y1)/2)
        connections = unscaled.edges()[edge]['weight']
        edge_text_labels.append(char1.capitalize() + ' -- ' + char2.capitalize() + f': {connections} connections')
    edge_text_trace = go.Scatter(x         = edge_text_xs,
                                 y         = edge_text_ys,
                                 text      = edge_text_labels,
                                 textposition = "bottom center",
                                 textfont_size = 10,
                                 mode      = 'markers',
                                 hoverinfo = 'text',
                                 marker    = dict(color = 'rgba(0,0,0,0)',
                                                 size  = 1,
                                                 line  = None))
    
    return edge_traces, edge_text_trace

In [9]:
def plot_network(graph, save = False, fname = 'graph'):
    scaled = scale_edge_weights(graph)
    pos = nx.spring_layout(graph, k =.4 , seed = 1)
    
    #Add edges
    edge_traces, edge_text_trace = make_edges(scaled, pos, graph)
        
    #Add nodes
    node_xs = [pos[node][0] for node in scaled.nodes()]
    node_ys = [pos[node][1] for node in scaled.nodes()]
    node_text = [node.capitalize() for node in scaled.nodes()]
    node_trace = go.Scatter(x     = node_xs,
                        y         = node_ys,
                        text      = node_text,
                        textposition = "bottom center",
                        textfont_size = 14,
                        mode      = 'markers+text',
                        hoverinfo = 'none',
                        marker    = dict(color = '#6959CD',
                                         size  = 12,
                                         line  = None))
    layout = go.Layout(paper_bgcolor='rgba(0,0,0,0)',plot_bgcolor='rgba(0,0,0,0)')
    fig = go.Figure(layout = layout)
    
    for trace in edge_traces:
        fig.add_trace(trace)
    fig.add_trace(node_trace)
    fig.add_trace(edge_text_trace)
    
    fig.update_layout(showlegend = False, width = 1500, height = 1500)
    fig.update_xaxes(showticklabels = False)
    fig.update_yaxes(showticklabels = False)
    
    if save:
        fig.write_image('network_graphs/'+fname+'.png')
    else:
        py.plot(fig, filename='test.html')

In [12]:
plot_network(graph1)


In [17]:
plot_network(graph3)