# Text Analytics project by Paige McKenzie

Includes code to generate all Javascript visualizations used in my [blog post](https://p-mckenzie.github.io/2018/01/11/Jane-Austen/).

Data available from [Project Gutenberg](https://www.gutenberg.org/).

In [None]:
import pandas as pd
import re
import nltk
import numpy as np
import itertools

from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyser = SentimentIntensityAnalyzer()

import matplotlib.pyplot as plt
%pylab inline

# Read data

In [None]:
with open('./Austen/Emma.txt', 'r') as myfile:
    emma_data = myfile.read().split('EMMA\n\nBy Jane Austen')[1].split('FINIS')[0].strip()

# simple substitutions to reasonable unicode
emma_data = re.sub('(--)|(_)', ' ', emma_data)
emma_data = unicode(re.sub(r"\s+", " ", emma_data), 'utf-8')
emma_data = re.sub(u"(\u2018|\u2019)", "'", emma_data)
emma_data = re.sub(u"(\u201c|\u201d)", '"', emma_data)
emma_data = re.sub('VOLUME [IXV]+\s+', '', emma_data)

print "{}\n\n------\n\n{}\n\n------\n\n{}".format(emma_data[10:200], emma_data[-198:], len(emma_data))

In [None]:
with open('./Austen/Pride and Prejudice.txt', 'r') as myfile:
    pap_data = myfile.read().split('PRIDE AND PREJUDICE\n\nBy Jane Austen')[1].split('End of the Project Gutenberg EBook of Pride and Prejudice, by Jane Austen')[0].strip()

# simple substitutions to reasonable unicode
pap_data = unicode(re.sub(r"\s+", " ", pap_data), 'utf-8')
pap_data = re.sub(u"(\u2018|\u2019)", "'", pap_data)
pap_data = re.sub(u"(\u201c|\u201d)", '"', pap_data)

print "{}\n\n------\n\n{}\n\n------\n\n{}".format(pap_data[10:200], pap_data[-198:], len(pap_data))

In [None]:
with open('./Austen/Sense and Sensibility.txt', 'r') as myfile:
    sas_data = myfile.read().split('SENSE AND SENSIBILITY\n\nby Jane Austen\n\n(1811)')[1].split('THE END')[0].strip()

# simple substitutions to reasonable unicode
sas_data = re.sub('(--)|(_)', ' ', sas_data)
sas_data = unicode(re.sub(r"\s+", " ", sas_data), 'utf-8')
sas_data = re.sub(u"(\u201c|\u201d)", '"', sas_data)
sas_data = re.sub('VOLUME [IXV]+\s+', '', sas_data)

print "{}\n\n------\n\n{}\n\n------\n\n{}".format(sas_data[10:200], sas_data[-200:], len(sas_data))

# Compare Vocabulary Sizes

In [None]:
def get_word_set(text):
    text = [word.lower() for word in re.findall(r"[A-Za-z']+", text)]
    print len(text)
    word_set = set(text)
    word_set.discard('')
    return word_set

sas_set = get_word_set(sas_data)
emma_set = get_word_set(emma_data)
pap_set = get_word_set(pap_data)

print len(sas_set), len(pap_set), len(emma_set)

In [None]:
from bokeh.core.properties import value
from bokeh.io import show, output_notebook
from bokeh.models import ColumnDataSource, LabelSet
from bokeh.plotting import figure
from bokeh.transform import dodge
output_notebook()
books = ['Emma', 'Pride and Prejudice', 'Sense and Sensibility']
data = {'books' : books,
        'Words'   : [161085, 122153, 119933],
        'Unique Words'   : [7433, 6570 , 6451],
        'text1'   : ['161,085', '122,153', '119,933'],
        'text2'   : ['7,275', '6,377' , '6,399'],
        'x1' : [.4,1.4,2.4],
        'x2' : [.6,1.6,2.6]
       }

source = ColumnDataSource(data=data)

p = figure(x_range=books, plot_height=350, plot_width=650, title="Word Counts by Novel", y_range=[0,180000],
           toolbar_location=None, tools="")

p.vbar(x=dodge('books', -0.1, range=p.x_range), top='Words', width=0.2, source=source,
       color='#5e4fa2', alpha=.6, legend=value("Words"))

p.vbar(x=dodge('books',  0.1,  range=p.x_range), top='Unique Words', width=0.2, source=source,
       color='#9e0142', alpha=.6, legend=value("Unique Words"))

labels = LabelSet(x='x1', y='Words', text='text1', level='glyph', source=source, 
                  text_align='left', angle=.8, text_font_size='7pt')
p.add_layout(labels)

labels = LabelSet(x='x2', y='Unique Words', text='text2', level='glyph', source=source, 
                  text_align='left', angle=.8, text_font_size='7pt')
p.add_layout(labels)

p.x_range.range_padding = 0.1
p.xgrid.grid_line_color = None
p.legend.location = "top_right"
p.legend.orientation = "horizontal"
p.legend.label_text_font_size = '8pt'
p.title.align = "center"
p.title.text_font_size = "20px"
p.toolbar_location = None

show(p)

In [None]:
print len(sas_set), len(pap_set), len(emma_set)

from matplotlib_venn import venn3
venn3([sas_set, pap_set, emma_set], ('Sense and Sensibility', 'Pride and Prejudice', 'Emma'))
plt.show()

In [None]:
from bokeh.models import ColumnDataSource, LabelSet
from bokeh.plotting import figure, show, output_file, output_notebook
output_notebook()

source = ColumnDataSource(dict(book=['Sense and Sensibility', 'Pride and Prejudice', 'Emma'],
                              size=[i/20.*5/8. for i in [6399, 6377, 7275]],
                              x=[1,2,1.5], y=[1.5,1.5,1],
                              angle=[.5,-.5,0],
                              labelx=[.1,2.9,1.5], labely=[2.1,2.15,0],
                              color=['#f47044', '#5e4fa2', '#9e0142'],
                              fill_alpha=[.6,.6,.4]))

p = figure(plot_width=500, plot_height=500, title='Vocabulary Scale by Novel',
           x_range=[-1.5, 4.5], y_range=[-.5, 2.8], tools=['pan,reset,wheel_zoom'])

p.circle('x', 'y', color='color', size='size', source=source, fill_alpha='fill_alpha')

labels = LabelSet(x='labelx', y='labely', text='book', angle='angle', level='glyph', source=source, text_align='center')
p.add_layout(labels)

source_lab = ColumnDataSource(dict(text=['1351', '658', '1363', '772', '3618', '738', '2147'],
                              x=[.2,1.5,2.8,.5,1.5,2.5,1.5], y=[1.8,1.9,1.8,1.1,1.3,1.1,.5]))

labels = LabelSet(x='x', y='y', text='text', level='glyph', source=source_lab, text_align='center')

p.add_layout(labels)

p.axis.visible = False
p.xgrid.grid_line_color = None
p.ygrid.grid_line_color = None
p.title.align = "center"
p.title.text_font_size = "20px"
p.toolbar_location = None

show(p)

# Top words by part of speech

In [None]:
#same as in other places
emma_chars = [['Emma Woodhouse', 'Emma'], 
             ['Mr. Knightley', 'Knightley'],
             ['Frank Churchill', 'Frank', 'Mr. Churchill'],
             ['Jane Fairfax', 'Jane'],
             ['Harriet Smith', 'Harriet'], 
             ['Miss Bates', 'Bates'],
             ['Mrs. Weston', 'Taylor'],
             ['Mr. Elton', 'Elton']]
pap_chars = [['Elizabeth Bennet', 'Elizabeth', 'Lizzie'],
             ['Jane Bennet', 'Jane', 'Ms. Bennet'],
             ['George Wickham', 'Wickham'],
             ['Mr. Darcy', 'Darcy'],
             ['Mr. Bingley', 'Bingley'],
             ['Charlotte Lucas', 'Charlotte', 'Ms. Lucas', 'Mrs. Collins'],
             ['Lady Catherine', 'De Bourgh'],
             ['Mr. Collins', 'Collins']]
sas_chars = [['Elinor Dashwood', 'Elinor', 'Miss Dashwood'],
            ['Marianne Dashwood', 'Marianne'],
            ['Colonel Brandon', 'Brandon'],
            ['John Willoughby', 'Willoughby'],
            ['Edward Ferrars', 'Edward', 'Mr. Ferrars'],
            ['Miss Grey', 'Sophia'],
            ['Lucy Steele', 'Miss Steele'],
            ['Mrs. Jennings']]

In [None]:
def find_conv_ratio(text):
    text = re.sub(r"((?<=[^\w])\'|\'(?=[^\w]))", '"', text)
    conv_list = ' '.join(re.findall(r'\"[^\"]+\"', text)).split()
    all_list = text.split()
    return len(conv_list)/float(len(all_list))

In [None]:
print find_conv_ratio(emma_data)
print find_conv_ratio(pap_data)
print find_conv_ratio(sas_data)

In [None]:
from nltk.corpus import stopwords
cachedStopWords = stopwords.words("english")

#remove character names/titles as well as stopwords
for name in {re.sub(r"\W", '', new_item) for sublist in emma_chars+pap_chars+sas_chars for item in sublist for new_item in item.split()}:
    cachedStopWords.append(name.lower())

def tag_text(message):
    return nltk.pos_tag([word for word in re.findall(r"[a-z']+", message.lower()) if word not in cachedStopWords])

sas_words = pd.DataFrame(tag_text(sas_data), columns=['word', 'pos'])
emma_words = pd.DataFrame(tag_text(emma_data), columns=['word', 'pos'])
pap_words = pd.DataFrame(tag_text(pap_data), columns=['word', 'pos'])

In [None]:
def condense_pos(df):
    df.loc[df['pos'].isin(['JJS', 'JJR']), 'pos'] = 'JJ'
    df.loc[df['pos'].isin(['NNP', 'NNPS', 'NNS']), 'pos'] = 'NN'
    df.loc[df['pos'].isin(['RBR', 'RBS']), 'pos'] = 'RB'
    df.loc[df['pos'].isin(['VBD' 'VBG', 'VBN', 'VBP', 'VBZ']), 'pos'] = 'VB'
    return df.loc[df['pos'].isin(['JJ', 'NN', 'RB', 'VB'])]

In [None]:
emma_words = condense_pos(emma_words).groupby(['pos', 'word']).size().reset_index().sort_values(0, ascending=False).groupby('pos').head(5).sort_values(['pos', 0], ascending=[True, False])
emma_words

In [None]:
pap_words = condense_pos(pap_words).groupby(['pos', 'word']).size().reset_index().sort_values(0, ascending=False).groupby('pos').head(5).sort_values(['pos', 0], ascending=[True, False])
pap_words

In [None]:
sas_words = condense_pos(sas_words).groupby(['pos', 'word']).size().reset_index().sort_values(0, ascending=False).groupby('pos').head(5).sort_values(['pos', 0], ascending=[True, False])
sas_words

In [None]:
print pd.Series(re.findall(r"[a-z']+", emma_data.lower())).value_counts().head(5).sum()
print pd.Series(re.findall(r"[a-z']+", pap_data.lower())).value_counts().head(5).sum()
print pd.Series(re.findall(r"[a-z']+", sas_data.lower())).value_counts().head(5).sum()

# Set up data for sentiment and character networks

## Emma

In [None]:
chunks = re.split('CHAPTER [IVX]+', emma_data)[1:]

emma_df = pd.DataFrame(columns=['chapter', 'text'])
for index, chapter in enumerate(re.findall('CHAPTER [IVX]+', emma_data)):
    emma_df = emma_df.append(pd.Series(['Chapter {}'.format(index+1), chunks[index].strip()], 
                             index=['chapter', 'text']), ignore_index=True)

def num_sentences(text):
    return len(nltk.sent_tokenize(text))

#find number of sentences per chapter
emma_df['sentences'] = emma_df['text'].apply(num_sentences)

#shift index down one, so first label is at location 0
emma_df['sentences'] = np.insert(emma_df['sentences'].values, 0, 0)[:-1]
#get cumulative sum, for location (starting at 0)
emma_df['sentences'] = emma_df['sentences'].cumsum()

emma_df['text'] = emma_df['text'].apply(nltk.sent_tokenize)

emma_df.reset_index(inplace=True)
emma_df.loc[(emma_df.index % 5 != 0) ^ (emma_df.index ==max(emma_df.index)), 'chapter'] = ''

emma_df.set_index('sentences', inplace=True)
del emma_df['index']
emma_df.head()

In [None]:
def count_occurrences(sent_list, char_list):
    count = 0
    for sentence in sent_list:
        for name in char_list:
            if name in sentence:
                count+=1
                break
    return count/float(len(sent_list))

# same as before
emma_chars = [['Emma Woodhouse', 'Emma'], 
             ['Mr. Knightley', 'Knightley'],
             ['Frank Churchill', 'Frank', 'Mr. Churchill'],
             ['Jane Fairfax', 'Jane'],
             ['Harriet Smith', 'Harriet'], 
             ['Miss Bates', 'Bates'],
             ['Mrs. Weston', 'Taylor'],
             ['Mr. Elton', 'Elton']]

for char_list in emma_chars:
    emma_df[char_list[0]] = emma_df['text'].apply(count_occurrences, char_list=char_list)
    emma_df[char_list[0]] = emma_df[char_list[0]]/sum(emma_df[char_list[0]])
    emma_df[char_list[0]] = np.convolve(emma_df[char_list[0]].values, np.ones((3,))/3, mode='same')
emma_df.head()

## Pride and Prejudice

In [None]:
pap_df = pd.DataFrame([re.findall('Chapter [\d]+', pap_data), re.split('\s+Chapter [\d]+\s+', pap_data)], 
                  index=['chapter', 'text']).T
pap_df.set_index('chapter', inplace=True)

def num_sentences(group):
    return len(nltk.sent_tokenize(group))

#find number of sentences per chapter
pap_df['sentences'] = pap_df['text'].apply(num_sentences)

#shift index down one, so first label is at location 0
pap_df['sentences'] = np.insert(pap_df['sentences'].values, 0, 0)[:-1]
#get cumulative sum, for location (starting at 0)
pap_df['sentences'] = pap_df['sentences'].cumsum()

pap_df['text'] = pap_df['text'].apply(nltk.sent_tokenize)

pap_df = pap_df.reset_index()
pap_df.loc[(pap_df.index % 5 != 0) ^ (pap_df.index ==max(pap_df.index)), 'chapter'] = ''

pap_df.set_index('sentences', inplace=True)

pap_df.head()

In [None]:
def count_occurrences(sent_list, char_list):
    count = 0
    for sentence in sent_list:
        for name in char_list:
            if name in sentence:
                count+=1
                break
    return count

# same as before
pap_chars = [['Elizabeth Bennet', 'Elizabeth', 'Lizzie'],
             ['Jane Bennet', 'Jane', 'Ms. Bennet'],
             ['George Wickham', 'Wickham'],
             ['Mr. Darcy', 'Darcy'],
             ['Mr. Bingley', 'Bingley'],
             ['Charlotte Lucas', 'Charlotte', 'Ms. Lucas', 'Mrs. Collins'],
             ['Lady Catherine', 'De Bourgh'],
             ['Mr. Collins', 'Collins']]

for char_list in pap_chars:
    pap_df[char_list[0]] = pap_df['text'].apply(count_occurrences, char_list=char_list)
    pap_df[char_list[0]] = pap_df[char_list[0]]/sum(pap_df[char_list[0]])
    pap_df[char_list[0]] = np.convolve(pap_df[char_list[0]].values, np.ones((3,))/3, mode='same')
pap_df.head()

## Sense and Sensibility

In [None]:
chunks = re.split('CHAPTER [\d]+', sas_data)[1:]

sas_df = pd.DataFrame(columns=['chapter', 'text'])
for index, chapter in enumerate(re.findall('CHAPTER [\d]+', sas_data)):
    sas_df = sas_df.append(pd.Series(['Chapter {}'.format(index+1), chunks[index].strip()], 
                             index=['chapter', 'text']), ignore_index=True)

def num_sentences(group):
    return len(nltk.sent_tokenize(group))

#find number of sentences per chapter
sas_df['sentences'] = sas_df['text'].apply(num_sentences)

#shift index down one, so first label is at location 0
sas_df['sentences'] = np.insert(sas_df['sentences'].values, 0, 0)[:-1]
#get cumulative sum, for location (starting at 0)
sas_df['sentences'] = sas_df['sentences'].cumsum()

sas_df['text'] = sas_df['text'].apply(nltk.sent_tokenize)

sas_df.reset_index(inplace=True)
sas_df.loc[(sas_df.index % 5 != 0) ^ (sas_df.index ==max(sas_df.index)), 'chapter'] = ''

sas_df.set_index('sentences', inplace=True)
del sas_df['index']

sas_df.head()

In [None]:
def count_occurrences(sent_list, char_list):
    count = 0
    for sentence in sent_list:
        for name in char_list:
            if name in sentence:
                count+=1
                break
    return count/float(len(sent_list))

# same as before
sas_chars = [['Elinor Dashwood', 'Elinor', 'Miss Dashwood'],
            ['Marianne Dashwood', 'Marianne'],
            ['Colonel Brandon', 'Brandon'],
            ['John Willoughby', 'Willoughby'],
            ['Edward Ferrars', 'Edward', 'Mr. Ferrars'],
            ['Miss Grey', 'Sophia'],
            ['Lucy Steele', 'Miss Steele'],
            ['Mrs. Jennings']]

for char_list in sas_chars:
    sas_df[char_list[0]] = sas_df['text'].apply(count_occurrences, char_list=char_list)
    sas_df[char_list[0]] = sas_df[char_list[0]]/sum(sas_df[char_list[0]])
    sas_df[char_list[0]] = np.convolve(sas_df[char_list[0]].values, np.ones((3,))/3, mode='same')
sas_df.head()

# sentiment plots

In [None]:
# emma sentiment

from bokeh.models import ColumnDataSource, Label, LabelSet, Arrow, VeeHead, Toggle, CustomJS
from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.models.tickers import FixedTicker
from bokeh.layouts import layout
output_notebook()

p_emma = figure(plot_width=650, plot_height=350, title='Emma Sentiment',
           tools=['pan,reset,wheel_zoom'], y_range=[-.08,.45])

# add a line renderer (rolling average by groups of 100)
emma_conv = np.convolve([analyser.polarity_scores(s)['compound'] for s in nltk.sent_tokenize(re.sub('VOLUME [IXV]+\s+|CHAPTER [IXV]+\s+', '', emma_data))], np.ones((100,))/100, mode='valid')
print emma_conv.max(), emma_conv.min(), emma_conv.mean(), emma_conv.std()
p_emma.line([i+1 for i in range(len(emma_conv))], emma_conv, line_width=1, color='#9e0142')

p_emma.xgrid.grid_line_color = None
p_emma.xaxis.major_label_orientation = 1
p_emma.x_range.range_padding = 0.05
p_emma.title.align = "center"
p_emma.title.text_font_size = "20px"

p_emma.xaxis.ticker = FixedTicker(ticks=emma_df.index.values)

p_emma.xaxis.major_label_overrides = emma_df.reset_index().astype(str).set_index('sentences').to_dict()['chapter']

p_emma.add_layout(Label(x=-50, y=-.05, text='* Rolling Average by 100 sentences', text_font_size='8pt',
                   render_mode='css', text_baseline='hanging', text_align='left'))

arrows_emma = [Arrow(end=VeeHead(size=20, fill_color='red', line_color="red", fill_alpha=0, line_alpha=0), 
                   x_start=6950, y_start=-.05, x_end=7030, y_end=-.05, line_color='white'),
          Arrow(end=VeeHead(size=20, fill_color='red', line_color="red", fill_alpha=0, line_alpha=0), 
                   x_start=2000, y_start=.02, x_end=2050, y_end=.025, line_color='white'),
          Arrow(end=VeeHead(size=20, fill_color='red', line_color="red", fill_alpha=0, line_alpha=0), 
                   x_start=4500, y_start=.33, x_end=4400, y_end=.326, line_color='white')
         ]
for arrow in arrows_emma:
    p_emma.add_layout(arrow)
    
source_lab = ColumnDataSource(dict(
    text=["Frank and Jane's Engagement Revealed", 'Mr. Elton Proposes', 'Emma Decides She Loves Frank'],
    x=[6780, 1850, 7180], 
    y=[-.039, .01, .345]))

labels_emma = LabelSet(x='x', y='y', text='text', text_align='right', source=source_lab, level='glyph', 
                       text_font_size='8pt', render_mode='css', text_baseline='hanging', text_alpha=0)

p_emma.add_layout(labels_emma)

code_emma = '''\
if toggle.active
    labels.text_alpha = 1
    a.end.fill_alpha = 1
    a.end.line_alpha = 1
    b.end.fill_alpha = 1
    b.end.line_alpha = 1
    c.end.fill_alpha = 1
    c.end.line_alpha = 1
    a.change.emit()
else
    labels.text_alpha = 0
    a.end.fill_alpha = 0
    a.end.line_alpha = 0
    b.end.fill_alpha = 0
    b.end.line_alpha = 0
    c.end.fill_alpha = 0
    c.end.line_alpha = 0
    a.change.emit()
'''
callback_emma = CustomJS.from_coffeescript(code=code_emma, args={})
toggle_emma = Toggle(label="Add Annotations", button_type="success", callback=callback_emma, css_classes=['toggle_button'])
callback_emma.args = {'toggle': toggle_emma, 'labels': labels_emma, 'a':arrows_emma[0],
                'b':arrows_emma[1], 'c':arrows_emma[2]}

layout_emma = layout([p_emma], [toggle_emma])

#output_file('emma.html')
show(layout_emma)

In [None]:
#pride and prejudice sentiment

from bokeh.models import ColumnDataSource, Label, LabelSet, Arrow, VeeHead, Toggle, CustomJS
from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.models.tickers import FixedTicker
from bokeh.layouts import layout
output_notebook()

p_pap = figure(plot_width=650, plot_height=350, title='Pride and Prejudice Sentiment',
           tools=['pan,reset,wheel_zoom'], y_range=[-.08,.45])

# add a line renderer (rolling average by groups of 100)
pap_conv = np.convolve([analyser.polarity_scores(s)['compound'] for s in nltk.sent_tokenize(pap_data)], np.ones((100,))/100, mode='valid')
print pap_conv.max(), pap_conv.min(), pap_conv.mean(), pap_conv.std()
p_pap.line([i+1 for i in range(len(pap_conv))], pap_conv, line_width=1, color='#5e4fa2')

p_pap.xgrid.grid_line_color = None
p_pap.xaxis.major_label_orientation = 1
p_pap.x_range.range_padding = 0.05
p_pap.title.align = "center"
p_pap.title.text_font_size = "20px"

p_pap.xaxis.ticker = FixedTicker(ticks=pap_df.index.values)

p_pap.xaxis.major_label_overrides = pap_df.reset_index().astype(str).set_index('sentences').to_dict()['chapter']

p_pap.add_layout(Label(x=-50, y=.44, text='* Rolling Average by 100 sentences', text_font_size='8pt',
                   render_mode='css', text_baseline='hanging', text_align='left'))

arrows_pap = [Arrow(end=VeeHead(size=20, fill_color='red', line_color="red", fill_alpha=0, line_alpha=0), 
                   x_start=3123, y_start=-0.029, x_end=3203, y_end=-0.015, line_color='white'),
          Arrow(end=VeeHead(size=20, fill_color='red', line_color="red", fill_alpha=0, line_alpha=0), 
                   x_start=4033, y_start=-0.019, x_end=3933, y_end=-0.015, line_color='white'),
               Arrow(end=VeeHead(size=20, fill_color='red', line_color="red", fill_alpha=0, line_alpha=0), 
                   x_start=1524, y_start=0, x_end=1620, y_end=0, line_color='white'),
               Arrow(end=VeeHead(size=20, fill_color='red', line_color="red", fill_alpha=0, line_alpha=0), 
                   x_start=5050, y_start=.295, x_end=5120, y_end=.28, line_color='white'),
               Arrow(end=VeeHead(size=20, fill_color='red', line_color="red", fill_alpha=0, line_alpha=0), 
                   x_start=5750, y_start=.325, x_end=5820, y_end=.315, line_color='white'),
               Arrow(end=VeeHead(size=20, fill_color='red', line_color="red", fill_alpha=0, line_alpha=0), 
                   x_start=1900, y_start=.35, x_end=1840, y_end=.35, line_color='white')
         ]
for arrow in arrows_pap:
    p_pap.add_layout(arrow)
    
source_pap = ColumnDataSource(dict(
    text=['Lydia Elopes', "Wickham's Character Revealed", 'Mr. Collins Proposes to Lizzie', 'Mr. Bingley Visits Longbourne',
         'Wedding', 'Mr. Collins Marries Charlotte'],
    x=[4830, 3060, 1450, 4980, 5680, 3520], 
    y=[-.022, -.032, 0.005, .319, .35, .36]))

labels_pap = LabelSet(x='x', y='y', text='text', text_align='right', source=source_pap, level='glyph', 
                       text_font_size='8pt', render_mode='css', text_baseline='hanging', text_alpha=0)

p_pap.add_layout(labels_pap)

code_pap = '''\
if toggle.active
    labels.text_alpha = 1
    a.end.fill_alpha = 1
    a.end.line_alpha = 1
    b.end.fill_alpha = 1
    b.end.line_alpha = 1
    c.end.fill_alpha = 1
    c.end.line_alpha = 1
    d.end.fill_alpha = 1
    d.end.line_alpha = 1
    e.end.fill_alpha = 1
    e.end.line_alpha = 1
    f.end.fill_alpha = 1
    f.end.line_alpha = 1
    a.change.emit()
else
    labels.text_alpha = 0
    a.end.fill_alpha = 0
    a.end.line_alpha = 0
    b.end.fill_alpha = 0
    b.end.line_alpha = 0
    c.end.fill_alpha = 0
    c.end.line_alpha = 0
    d.end.fill_alpha = 0
    d.end.line_alpha = 0
    e.end.fill_alpha = 0
    e.end.line_alpha = 0
    f.end.fill_alpha = 0
    f.end.line_alpha = 0
    a.change.emit()
'''
callback_pap = CustomJS.from_coffeescript(code=code_pap, args={})
toggle_pap = Toggle(label="Add Annotations", button_type="success", callback=callback_pap, css_classes=['toggle_button'])
callback_pap.args = {'toggle': toggle_pap, 'labels': labels_pap, 'a':arrows_pap[0],
                'b':arrows_pap[1], 'c':arrows_pap[2], 'd':arrows_pap[3], 'e':arrows_pap[4], 'f':arrows_pap[5]}

layout_pap = layout([p_pap], [toggle_pap])

output_file('pride_and_prejudice.html')
show(layout_pap)

In [None]:
#sense and sensibility sentiment

from bokeh.models import ColumnDataSource, Label, LabelSet, Arrow, VeeHead, Toggle, CustomJS
from bokeh.plotting import figure, show, output_file, output_notebook
from bokeh.models.tickers import FixedTicker
from bokeh.layouts import layout
output_notebook()

p = figure(plot_width=650, plot_height=350, title='Sense and Sensibility: Sentiment',
           tools=['pan,reset,wheel_zoom'], y_range=[-.08,.45])

# add a line renderer (rolling average by groups of 100)
sas_conv = np.convolve([analyser.polarity_scores(s)['compound'] for s in nltk.sent_tokenize(re.sub('CHAPTER [\d]+\s+', '', sas_data))], np.ones((100,))/100, mode='valid')
print sas_conv.max(), sas_conv.min(), sas_conv.mean(), sas_conv.std()
p.line([i+1 for i in range(len(sas_conv))], sas_conv, line_width=1, color='#f47044')

p.xgrid.grid_line_color = None
p.xaxis.major_label_orientation = 1
p.x_range.range_padding = 0.05
p.title.align = "center"
p.title.text_font_size = "20px"

p.xaxis.ticker = FixedTicker(ticks=sas_df.index.values)

p.xaxis.major_label_overrides = sas_df.reset_index().astype(str).set_index('sentences').to_dict()['chapter']

p.add_layout(Label(x=-50, y=-.05, text='* Rolling Average by 100 sentences', text_font_size='8pt',
                   render_mode='css', text_baseline='hanging', text_align='left'))

arrows = [Arrow(end=VeeHead(size=20, fill_color='red', line_color="red", fill_alpha=0, line_alpha=0), 
                   x_start=4530, y_start=.35, x_end=4630, y_end=.35, line_color='white'),
          Arrow(end=VeeHead(size=20, fill_color='red', line_color="red", fill_alpha=0, line_alpha=0), 
                   x_start=2600, y_start=-.05, x_end=2500, y_end=-.04, line_color='white'),
          Arrow(end=VeeHead(size=20, fill_color='red', line_color="red", fill_alpha=0, line_alpha=0), 
                   x_start=535, y_start=.42, x_end=435, y_end=.42, line_color='white'),
          Arrow(end=VeeHead(size=20, fill_color='red', line_color="red", fill_alpha=0, line_alpha=0), 
                   x_start=3050, y_start=.36, x_end=3150, y_end=.33, line_color='white'),
          Arrow(end=VeeHead(size=20, fill_color='red', line_color="red", fill_alpha=0, line_alpha=0), 
                   x_start=960, y_start=0, x_end=1030, y_end=.005, line_color='white'),
          Arrow(end=VeeHead(size=20, fill_color='red', line_color="red", fill_alpha=0, line_alpha=0), 
                   x_start=4820, y_start=0, x_end=4900, y_end=.015, line_color='white')]
for arrow in arrows:
    p.add_layout(arrow)
    
source_lab = ColumnDataSource(dict(
    text=['Marianne Recovers', "Willoughby's Engagement", "Dashwoods Move to Barton Cottage", "Fanny Dashwood's Dinner",
         "Willoughby Departs", "Mr. Ferrars' Marriage"],
    x=[4480, 3830, 2230, 3050, 860, 4770], 
    y=[.36, -.05, .43, .38, 0, -.005]))

labels = LabelSet(x='x', y='y', text='text', text_align='right', source=source_lab, level='glyph', text_font_size='8pt', 
                 render_mode='css', text_baseline='hanging', text_alpha=0)
p.add_layout(labels)

code = '''\
if toggle.active
    labels.text_alpha = 1
    a.end.fill_alpha = 1
    a.end.line_alpha = 1
    b.end.fill_alpha = 1
    b.end.line_alpha = 1
    c.end.fill_alpha = 1
    c.end.line_alpha = 1
    d.end.fill_alpha = 1
    d.end.line_alpha = 1
    e.end.fill_alpha = 1
    e.end.line_alpha = 1
    f.end.fill_alpha = 1
    f.end.line_alpha = 1
    a.change.emit()
else
    labels.text_alpha = 0
    a.end.fill_alpha = 0
    a.end.line_alpha = 0
    b.end.fill_alpha = 0
    b.end.line_alpha = 0
    c.end.fill_alpha = 0
    c.end.line_alpha = 0
    d.end.fill_alpha = 0
    d.end.line_alpha = 0
    e.end.fill_alpha = 0
    e.end.line_alpha = 0
    f.end.fill_alpha = 0
    f.end.line_alpha = 0
    a.change.emit()
'''
callback = CustomJS.from_coffeescript(code=code, args={})
toggle = Toggle(label="Add Annotations", button_type="success", callback=callback, css_classes=['toggle_button'])
callback.args = {'toggle': toggle, 'labels': labels, 'a':arrows[0],
                'b':arrows[1], 'c':arrows[2], 'd':arrows[3], 'e':arrows[4],
                'f':arrows[5]}

layout = layout([p], [toggle])

#output_file('sense_and_sensibility.html')
show(layout)

# Character density plots

In [None]:
# emma character density

from bokeh.plotting import figure, output_notebook, show, output_file
from bokeh.models import Legend
from bokeh.models.tickers import FixedTicker
output_notebook()

p_emma_char = figure(plot_width=650, plot_height=325, x_axis_type="datetime", tools=['ywheel_zoom,pan,reset'],
           y_range=[0,.1], x_range=[-2,75], title='Emma: Character Occurrences by Chapter')

subset = emma_df[emma_df.columns[2:]]

for k, (data, name, color) in enumerate(zip(subset.T.values, subset.columns, 
                            [matplotlib.colors.rgb2hex(i[:3]) for i in cm.get_cmap('Spectral')(np.linspace(0, 1, len(subset.columns)))]
)):
    c = p_emma_char.line([i for i in range (1,len(data)+1)], data, line_width=2, color=color, legend=name)
    if k not in (2,3,4,5):
        c.visible = False

p_emma_char.legend.location = "center_right"
p_emma_char.legend.click_policy="hide"

p_emma_char.xaxis.ticker = FixedTicker(ticks=[i for i in range (1, len(data)+1)])
p_emma_char.xaxis.major_label_overrides = {str(i):emma_df['chapter'].iloc[i-1] for i in range(0, len(emma_df)+1)}

p_emma_char.xgrid.grid_line_color = None
p_emma_char.xaxis.major_label_orientation = 1
p_emma_char.title.align = "left"
p_emma_char.title.text_font_size = "15px"
p_emma_char.legend.label_text_font = '10pt'

#output_file('emma_char_density.html')
show(p_emma_char)

In [None]:
# pride and prejudice character density

from bokeh.plotting import figure, output_notebook, show
from bokeh.models import Legend
from bokeh.models.tickers import FixedTicker

p_pap_char = figure(plot_width=650, plot_height=325, x_axis_type="datetime", tools=['ywheel_zoom,pan,reset'],
           y_range=[0,.1], x_range=[-2,82], title='Pride and Prejudice: Character Occurrences by Chapter')

subset = pap_df[pap_df.columns[2:]]

for k, (data, name, color) in enumerate(zip(subset.T.values, subset.columns, 
                            [matplotlib.colors.rgb2hex(i[:3]) for i in cm.get_cmap('Spectral')(np.linspace(0, 1, len(subset.columns)))]
)):
    c = p_pap_char.line([i for i in range (1,len(data)+1)], data, line_width=2, color=color, legend=name)
    if k not in (2,5,6,7):
        c.visible = False

p_pap_char.legend.location = "center_right"
p_pap_char.legend.click_policy="hide"

output_notebook()

p_pap_char.xaxis.ticker = FixedTicker(ticks=[i for i in range (1, len(data)+1)])
p_pap_char.xaxis.major_label_overrides = {str(i):pap_df['chapter'].iloc[i-1] for i in range(0, len(pap_df)+1)}

p_pap_char.xgrid.grid_line_color = None
p_pap_char.xgrid.grid_line_color = None
p_pap_char.xaxis.major_label_orientation = 1
p_pap_char.title.align = "left"
p_pap_char.title.text_font_size = "15px"
p_pap_char.legend.label_text_font = '10pt'

#output_file('pap_char_density.html')
show(p_pap_char)

In [None]:
#sense and sensibility character density

from bokeh.plotting import figure, output_notebook, show
from bokeh.models import Legend
from bokeh.models.tickers import FixedTicker
output_notebook()

p_sas_char = figure(plot_width=650, plot_height=325, x_axis_type="datetime", tools=['ywheel_zoom,pan,reset'],
           y_range=[0,.1], x_range=[-2,70], title='Sense and Sensibility: Character Occurrences by Chapter')

subset = sas_df[sas_df.columns[2:]]

for k, (data, name, color) in enumerate(zip(subset.T.values, subset.columns, 
                            [matplotlib.colors.rgb2hex(i[:3]) for i in cm.get_cmap('Spectral')(np.linspace(0, 1, len(subset.columns)))]
)):
    c = p_sas_char.line([i for i in range (1,len(data)+1)], data, line_width=2, color=color, legend=name)
    if k not in (0,1,3,4):
        c.visible = False
        
p_sas_char.legend.location = "center_right"
p_sas_char.legend.click_policy="hide"

p_sas_char.xaxis.ticker = FixedTicker(ticks=[i for i in range (1, len(data)+1)])
p_sas_char.xaxis.major_label_overrides = {str(i):sas_df['chapter'].iloc[i-1] for i in range(0, len(sas_df)+1)}

p_sas_char.xgrid.grid_line_color = None
p_sas_char.xaxis.major_label_orientation = 1
p_sas_char.title.align = "left"
p_sas_char.title.text_font_size = "15px"
p_sas_char.legend.label_text_font = '10pt'

#output_file('sas_char_density.html')
show(p_sas_char)

# Find network of primary characters - Emma 

In [None]:
def get_occurrences_by_3(data, name_list):
    sentences = nltk.sent_tokenize(data)
    for index,sentence in enumerate(sentences):
        for name in name_list:
            if name in sentence or name.upper() in sentence:
                try:
                    output.append(" ".join([sentences[i] for i in range(max(0,index-3), min(index+3, len(sentences)-1))]))
                except:
                    output = [" ".join([sentences[i] for i in range(max(0,index-3), min(index+3, len(sentences)-1))])]
                break
    return output

def get_relationship_float(data, viewpoint_list, target_list):
    subset = get_occurrences_by_3(data, viewpoint_list)
    for sentences in subset:
        for name in target_list:
            if name in sentences or name.upper() in sentences:
                try:
                    output.append(1)
                except:
                    output = [1]
                break
    try:
        return len(output)/float(len(subset))
    except:
        return 0.

In [None]:
emma_chars = [['Emma Woodhouse', 'Emma'], 
             ['Mr. Knightley', 'Knightley'],
             ['Frank Churchill', 'Frank', 'Mr. Churchill'],
             ['Jane Fairfax', 'Jane'],
             ['Harriet Smith', 'Harriet'], 
             ['Miss Bates', 'Bates'],
             ['Mrs. Weston', 'Taylor'],
             ['Mr. Elton', 'Elton']]
pap_chars = [['Elizabeth Bennet', 'Elizabeth', 'Lizzie'],
             ['Jane Bennet', 'Jane', 'Ms. Bennet'],
             ['George Wickham', 'Wickham'],
             ['Mr. Darcy', 'Darcy'],
             ['Mr. Bingley', 'Bingley'],
             ['Charlotte Lucas', 'Charlotte', 'Ms. Lucas', 'Mrs. Collins'],
             ['Lady Catherine', 'De Bourgh'],
             ['Mr. Collins', 'Collins']]
sas_chars = [['Elinor Dashwood', 'Elinor', 'Miss Dashwood'],
            ['Marianne Dashwood', 'Marianne'],
            ['Colonel Brandon', 'Brandon'],
            ['John Willoughby', 'Willoughby'],
            ['Edward Ferrars', 'Edward', 'Mr. Ferrars'],
            ['Miss Grey', 'Sophia'],
            ['Lucy Steele', 'Miss Steele'],
            ['Mrs. Jennings']]

chars = [item[0] for item in emma_chars]

In [None]:
relation_df = pd.DataFrame()
for viewpoint, target in list(itertools.permutations(emma_chars, 2)):
    relation_df = relation_df.append(pd.Series([viewpoint[0], target[0], get_relationship_float(emma_data, viewpoint, target)], index=['viewpoint', 'target', 'score']), ignore_index=True)
print relation_df['score'].mean()

In [None]:
dim = pd.DataFrame()
dim['node'] = chars
dim['circ'] = [i*2*math.pi/len(chars) for i in range(len(chars))]
dim['x'] = [math.sin(i) for i in dim['circ']]
dim['y'] = [math.cos(i) for i in dim['circ']]
dim['color'] = [matplotlib.colors.rgb2hex(i[:3]) for i in cm.get_cmap('plasma')(np.linspace(0, 1, len(dim)))]

dim.set_index('node', inplace=True)
dim.head()

In [None]:
connections = pd.DataFrame(columns=
                ['viewpoint', 'target', 'x_start', 'y_start', 'x_end', 'y_end', 'strength'])
for index, row in relation_df.iterrows():
    start = row['viewpoint']
    end = row['target']
    x_start = 0
    y_start = 0
    x_end = dim.loc[end]['x']
    y_end = dim.loc[end]['y']
    connections = connections.append(
            pd.Series([start, end, x_start, y_start, x_end, y_end, row['score']],
                      index=['viewpoint', 'target', 'x_start', 'y_start', 'x_end', 'y_end', 'strength']), ignore_index=True)         
dim.reset_index(inplace=True)

connections['scale'] = [(i-min(connections['strength']))/(max(connections['strength'])-min(connections['strength']))*20+.05 for i in connections['strength']]
connections['xs'] = list(list(lst) for lst in connections[['x_start', 'x_end']].values)
connections['ys'] = list(list(lst) for lst in connections[['y_start', 'y_end']].values)
connections.head()

In [None]:
from bokeh.models.glyphs import Circle, MultiLine
from bokeh.models import ColumnDataSource, Label, LabelSet, CustomJS, HoverTool, Arrow, VeeHead
from bokeh.plotting import Figure, show, output_notebook, output_file
from bokeh.models.widgets.groups import RadioGroup
from bokeh.layouts import row
from bokeh.events import MouseEnter
from bokeh.models.formatters import NumeralTickFormatter
output_notebook()

plot = Figure(plot_width=500, plot_height=500, 
              x_range=[-1.6,1.4], y_range=[-1.5,1.5], title='Emma Character Network',
             tools=[''])

l0 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=.98)
r_l0 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Emma Woodhouse']), l0)

l1 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=0)
r_l1 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Mr. Knightley']), l1)

l2 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=0)
r_l2 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Frank Churchill']), l2)

l3 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=0)
r_l3 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Jane Fairfax']), l3)

l4 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=0)
r_l4 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Harriet Smith']), l4)

l5 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=0)
r_l5 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Miss Bates']), l5)

l6 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=.0)
r_l6 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Mrs. Weston']), l6)

l7 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=0)
r_l7 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Mr. Elton']), l7)

c0 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color', line_alpha=0, fill_alpha=0)
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Emma Woodhouse']), c0)

c1 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Mr. Knightley']), c1)

c2 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Frank Churchill']), c2)

c3 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Jane Fairfax']), c3)

c4 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Harriet Smith']), c4)

c5 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Miss Bates']), c5)

c6 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Mrs. Weston']), c6)

c7 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Mr. Elton']), c7)

ccenter = Circle(x=0, y=0, size=20, fill_color=dim[dim['node']=='Emma Woodhouse']['color'].values[0], 
                      line_color=dim[dim['node']=='Emma Woodhouse']['color'].values[0])
plot.add_glyph(ccenter)

a0 = Label(x=dim[dim['node']=='Emma Woodhouse']['x'].values[0], y=dim[dim['node']=='Emma Woodhouse']['y'].values[0], 
           text='Emma Woodhouse', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt', text_color="white")
plot.add_layout(a0)

a1 = Label(x=dim[dim['node']=='Mr. Knightley']['x'].values[0], y=dim[dim['node']=='Mr. Knightley']['y'].values[0], 
           text='Mr. Knightley', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a1)

a2 = Label(x=dim[dim['node']=='Frank Churchill']['x'].values[0], y=dim[dim['node']=='Frank Churchill']['y'].values[0], 
           text='Frank Churchill', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a2)

a3 = Label(x=dim[dim['node']=='Jane Fairfax']['x'].values[0], y=dim[dim['node']=='Jane Fairfax']['y'].values[0], 
           text='Jane Fairfax', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a3)

a4 = Label(x=dim[dim['node']=='Harriet Smith']['x'].values[0], y=dim[dim['node']=='Harriet Smith']['y'].values[0], 
           text='Harriet Smith', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a4)

a5 = Label(x=dim[dim['node']=='Miss Bates']['x'].values[0], y=dim[dim['node']=='Miss Bates']['y'].values[0], 
           text='Miss Bates', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a5)

a6 = Label(x=dim[dim['node']=='Mrs. Weston']['x'].values[0], y=dim[dim['node']=='Mrs. Weston']['y'].values[0], 
           text='Mrs. Weston', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a6)

a7 = Label(x=dim[dim['node']=='Mr. Elton']['x'].values[0], y=dim[dim['node']=='Mr. Elton']['y'].values[0], 
           text='Mr. Elton', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a7)

center = Label(x=0, y=0, text='Emma Woodhouse', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(center)
plot.legend.location = None

code = """
ccenter.fill_color = colors.data['color'][radio.active]
ccenter.line_color = colors.data['color'][radio.active]
if (radio.active == 0) {
    l0.line_alpha.value = 1
    c0.fill_alpha = 0
    c0.line_alpha = 0
    a0.text_color = "white"
    hover.renderers = [r_l0]
} else {
    l0.line_alpha.value = 0
    c0.fill_alpha = 1
    c0.line_alpha = 1
    a0.text_color = "black"
}
if (radio.active == 1) {
    l1.line_alpha.value = 1
    c1.fill_alpha = 0
    c1.line_alpha = 0
    a1.text_color = "white"
    hover.renderers = [r_l1]
} else {
    l1.line_alpha.value = 0
    c1.fill_alpha = 1
    c1.line_alpha = 1
    a1.text_color = "black"
}
if (radio.active == 2) {
    l2.line_alpha.value = 1
    c2.fill_alpha = 0
    c2.line_alpha = 0
    a2.text_color = "white"
    hover.renderers = [r_l2]
} else {
    l2.line_alpha.value = 0
    c2.fill_alpha = 1
    c2.line_alpha = 1
    a2.text_color = "black"
}
if (radio.active == 3) {
    l3.line_alpha.value = 1
    c3.fill_alpha = 0
    c3.line_alpha = 0
    a3.text_color = "white"
    hover.renderers = [r_l3]
} else {
    l3.line_alpha.value = 0
    c3.fill_alpha = 1
    c3.line_alpha = 1
    a3.text_color = "black"
}
if (radio.active == 4) {
    l4.line_alpha.value = 1
    c4.fill_alpha = 0
    c4.line_alpha = 0
    a4.text_color = "white"
    hover.renderers = [r_l4]
} else {
    l4.line_alpha.value = 0
    c4.fill_alpha = 1
    c4.line_alpha = 1
    a4.text_color = "black"
}
if (radio.active == 5) {
    l5.line_alpha.value = 1
    c5.fill_alpha = 0
    c5.line_alpha = 0
    a5.text_color = "white"
    hover.renderers = [r_l5]
} else {
    l5.line_alpha.value = 0
    c5.fill_alpha = 1
    c5.line_alpha = 1
    a5.text_color = "black"
}
if (radio.active == 6) {
    l6.line_alpha.value = 1
    c6.fill_alpha = 0
    c6.line_alpha = 0
    a6.text_color = "white"
    hover.renderers = [r_l6]
} else {
    l6.line_alpha.value = 0
    c6.fill_alpha = 1
    c6.line_alpha = 1
    a6.text_color = "black"
}
if (radio.active == 7) {
    l7.line_alpha.value = 1
    c7.fill_alpha = 0
    c7.line_alpha = 0
    a7.text_color = "white"
    hover.renderers = [r_l7]
} else {
    l7.line_alpha.value = 0
    c7.fill_alpha = 1
    c7.line_alpha = 1
    a7.text_color = "black"
}
center.text = radio.labels[radio.active]
"""

callback = CustomJS(code=code, args={})
radio = RadioGroup(labels=list(connections['viewpoint'].unique()), active=0, callback=callback, height=500, width=200)

hover = HoverTool(tooltips=[
    ("Viewpoint", "@viewpoint"),
    ("Target", "@target"),
    ("Co-occurrence", "@strength{(0%)}")], line_policy='next', renderers=[r_l1,r_l2,r_l3,r_l4,r_l5,r_l6,r_l7,r_l0])

plot.add_tools(hover)
callback.args = dict(radio=radio, l0=l0, l1=l1, l2=l2, l3=l3, l4=l4, l5=l5, l6=l6, l7=l7, center=center,
                    c0=c0, c1=c1, c2=c2, c3=c3, c4=c4, c5=c5, c6=c6, c7=c7,
                    a0=a0, a1=a1, a2=a2, a3=a3, a4=a4, a5=a5, a6=a6, a7=a7,
                    ccenter=ccenter, colors=ColumnDataSource(dim),
                    r_l0=r_l0, r_l1=r_l1, r_l2=r_l2, r_l3=r_l3, r_l4=r_l4,
                     r_l5=r_l5, r_l6=r_l6, r_l7=r_l7, hover=hover)

plot.axis.visible = False
plot.xgrid.grid_line_color = None
plot.ygrid.grid_line_color = None
plot.title.align = "center"
plot.title.text_font_size = "20px"

plot.js_on_event(MouseEnter, callback)

layout = row(plot, radio)

#output_file('emma_char_network.html')
show(layout)

# Find network of primary characters - Sense and Sensibility

In [None]:
emma_chars = [['Emma Woodhouse', 'Emma'], 
             ['Mr. Knightley', 'Knightley'],
             ['Frank Churchill', 'Frank', 'Mr. Churchill'],
             ['Jane Fairfax', 'Jane'],
             ['Harriet Smith', 'Harriet'], 
             ['Miss Bates', 'Bates'],
             ['Mrs. Weston', 'Taylor'],
             ['Mr. Elton', 'Elton']]
pap_chars = [['Elizabeth Bennet', 'Elizabeth', 'Lizzie'],
             ['Jane Bennet', 'Jane', 'Ms. Bennet'],
             ['George Wickham', 'Wickham'],
             ['Mr. Darcy', 'Darcy'],
             ['Mr. Bingley', 'Bingley'],
             ['Charlotte Lucas', 'Charlotte', 'Ms. Lucas', 'Mrs. Collins'],
             ['Lady Catherine', 'De Bourgh'],
             ['Mr. Collins', 'Collins']]
sas_chars = [['Elinor Dashwood', 'Elinor', 'Miss Dashwood'],
            ['Marianne Dashwood', 'Marianne'],
            ['Colonel Brandon', 'Brandon'],
            ['John Willoughby', 'Willoughby'],
            ['Edward Ferrars', 'Edward', 'Mr. Ferrars'],
            ['Miss Grey', 'Sophia'],
            ['Lucy Steele', 'Miss Steele'],
            ['Mrs. Jennings']]

chars = [item[0] for item in sas_chars]

In [None]:
relation_df = pd.DataFrame(columns=['viewpoint', 'target', 'score'])
for viewpoint, target in list(itertools.permutations(sas_chars, 2)):
    relation_df = relation_df.append(pd.Series([viewpoint[0], target[0], get_relationship_float(sas_data, viewpoint, target)], index=['viewpoint', 'target', 'score']), ignore_index=True)

print relation_df['score'].mean()

In [None]:
dim = pd.DataFrame()
dim['node'] = chars
dim['circ'] = [i*2*math.pi/len(chars) for i in range(len(chars))]
dim['x'] = [math.sin(i) for i in dim['circ']]
dim['y'] = [math.cos(i) for i in dim['circ']]
dim['color'] = [matplotlib.colors.rgb2hex(i[:3]) for i in cm.get_cmap('plasma')(np.linspace(0, 1, len(dim)))]

dim.set_index('node', inplace=True)
dim.head()

In [None]:
connections = pd.DataFrame(columns=
                ['viewpoint', 'target', 'x_start', 'y_start', 'x_end', 'y_end', 'strength'])
for index, row in relation_df.iterrows():
    start = row['viewpoint']
    end = row['target']
    x_start = 0
    y_start = 0
    x_end = dim.loc[end]['x']
    y_end = dim.loc[end]['y']
    connections = connections.append(
            pd.Series([start, end, x_start, y_start, x_end, y_end, row['score']],
                      index=['viewpoint', 'target', 'x_start', 'y_start', 'x_end', 'y_end', 'strength']), ignore_index=True)         
dim.reset_index(inplace=True)

connections['scale'] = [(i-min(connections['strength']))/(max(connections['strength'])-min(connections['strength']))*20+.05 for i in connections['strength']]
connections['xs'] = list(list(lst) for lst in connections[['x_start', 'x_end']].values)
connections['ys'] = list(list(lst) for lst in connections[['y_start', 'y_end']].values)
connections.head()

In [None]:
from bokeh.models.glyphs import Circle, MultiLine
from bokeh.models import ColumnDataSource, Label, LabelSet, CustomJS, HoverTool, Arrow, VeeHead
from bokeh.plotting import Figure, show, output_notebook, output_file
from bokeh.models.widgets.groups import RadioGroup
from bokeh.layouts import row
from bokeh.events import MouseEnter
from bokeh.models.formatters import NumeralTickFormatter
output_notebook()

plot = Figure(plot_width=500, plot_height=500, 
              x_range=[-1.6,1.4], y_range=[-1.5,1.5], title='Sense and Sensibility Character Network',
             tools=[''])

l0 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=.98)
r_l0 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Elinor Dashwood']), l0)

l1 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=0)
r_l1 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Marianne Dashwood']), l1)

l2 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=0)
r_l2 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Colonel Brandon']), l2)

l3 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=0)
r_l3 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='John Willoughby']), l3)

l4 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=0)
r_l4 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Edward Ferrars']), l4)

l5 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=0)
r_l5 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Miss Grey']), l5)

l6 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=.0)
r_l6 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Lucy Steele']), l6)

l7 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=0)
r_l7 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Mrs. Jennings']), l7)

c0 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color', line_alpha=0, fill_alpha=0)
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Elinor Dashwood']), c0)

c1 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Marianne Dashwood']), c1)

c2 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Colonel Brandon']), c2)

c3 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='John Willoughby']), c3)

c4 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Edward Ferrars']), c4)

c5 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Miss Grey']), c5)

c6 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Lucy Steele']), c6)

c7 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Mrs. Jennings']), c7)

ccenter = Circle(x=0, y=0, size=20, fill_color=dim[dim['node']=='Elinor Dashwood']['color'].values[0], 
                      line_color=dim[dim['node']=='Elinor Dashwood']['color'].values[0])
plot.add_glyph(ccenter)

a0 = Label(x=dim[dim['node']=='Elinor Dashwood']['x'].values[0], y=dim[dim['node']=='Elinor Dashwood']['y'].values[0], 
           text='Elinor Dashwood', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt', text_color="white")
plot.add_layout(a0)

a1 = Label(x=dim[dim['node']=='Marianne Dashwood']['x'].values[0], y=dim[dim['node']=='Marianne Dashwood']['y'].values[0], 
           text='Marianne Dashwood', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a1)

a2 = Label(x=dim[dim['node']=='Colonel Brandon']['x'].values[0], y=dim[dim['node']=='Colonel Brandon']['y'].values[0], 
           text='Colonel Brandon', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a2)

a3 = Label(x=dim[dim['node']=='John Willoughby']['x'].values[0], y=dim[dim['node']=='John Willoughby']['y'].values[0], 
           text='John Willoughby', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a3)

a4 = Label(x=dim[dim['node']=='Edward Ferrars']['x'].values[0], y=dim[dim['node']=='Edward Ferrars']['y'].values[0], 
           text='Edward Ferrars', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a4)

a5 = Label(x=dim[dim['node']=='Miss Grey']['x'].values[0], y=dim[dim['node']=='Miss Grey']['y'].values[0], 
           text='Miss Grey', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a5)

a6 = Label(x=dim[dim['node']=='Lucy Steele']['x'].values[0], y=dim[dim['node']=='Lucy Steele']['y'].values[0], 
           text='Lucy Steele', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a6)

a7 = Label(x=dim[dim['node']=='Mrs. Jennings']['x'].values[0], y=dim[dim['node']=='Mrs. Jennings']['y'].values[0], 
           text='Mrs. Jennings', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a7)

center = Label(x=0, y=0, text='Elinor Dashwood', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(center)
plot.legend.location = None

code = """
ccenter.fill_color = colors.data['color'][radio.active]
ccenter.line_color = colors.data['color'][radio.active]
if (radio.active == 0) {
    l0.line_alpha.value = 1
    c0.fill_alpha = 0
    c0.line_alpha = 0
    a0.text_color = "white"
    hover.renderers = [r_l0]
} else {
    l0.line_alpha.value = 0
    c0.fill_alpha = 1
    c0.line_alpha = 1
    a0.text_color = "black"
}
if (radio.active == 1) {
    l1.line_alpha.value = 1
    c1.fill_alpha = 0
    c1.line_alpha = 0
    a1.text_alpha = 0
    hover.renderers = [r_l1]
} else {
    l1.line_alpha.value = 0
    c1.fill_alpha = 1
    c1.line_alpha = 1
    a1.text_alpha = 1
}
if (radio.active == 2) {
    l2.line_alpha.value = 1
    c2.fill_alpha = 0
    c2.line_alpha = 0
    a2.text_color = "white"
    hover.renderers = [r_l2]
} else {
    l2.line_alpha.value = 0
    c2.fill_alpha = 1
    c2.line_alpha = 1
    a2.text_color = "black"
}
if (radio.active == 3) {
    l3.line_alpha.value = 1
    c3.fill_alpha = 0
    c3.line_alpha = 0
    a3.text_color = "white"
    hover.renderers = [r_l3]
} else {
    l3.line_alpha.value = 0
    c3.fill_alpha = 1
    c3.line_alpha = 1
    a3.text_color = "black"
}
if (radio.active == 4) {
    l4.line_alpha.value = 1
    c4.fill_alpha = 0
    c4.line_alpha = 0
    a4.text_color = "white"
    hover.renderers = [r_l4]
} else {
    l4.line_alpha.value = 0
    c4.fill_alpha = 1
    c4.line_alpha = 1
    a4.text_color = "black"
}
if (radio.active == 5) {
    l5.line_alpha.value = 1
    c5.fill_alpha = 0
    c5.line_alpha = 0
    a5.text_color = "white"
    hover.renderers = [r_l5]
} else {
    l5.line_alpha.value = 0
    c5.fill_alpha = 1
    c5.line_alpha = 1
    a5.text_color = "black"
}
if (radio.active == 6) {
    l6.line_alpha.value = 1
    c6.fill_alpha = 0
    c6.line_alpha = 0
    a6.text_color = "white"
    hover.renderers = [r_l6]
} else {
    l6.line_alpha.value = 0
    c6.fill_alpha = 1
    c6.line_alpha = 1
    a6.text_color = "black"
}
if (radio.active == 7) {
    l7.line_alpha.value = 1
    c7.fill_alpha = 0
    c7.line_alpha = 0
    a7.text_color = "white"
    hover.renderers = [r_l7]
} else {
    l7.line_alpha.value = 0
    c7.fill_alpha = 1
    c7.line_alpha = 1
    a7.text_color = "black"
}
center.text = radio.labels[radio.active]
"""

callback = CustomJS(code=code, args={})
radio = RadioGroup(labels=list(connections['viewpoint'].unique()), active=0, callback=callback, height=500, width=200)

hover = HoverTool(tooltips=[
    ("Viewpoint", "@viewpoint"),
    ("Target", "@target"),
    ("Co-occurrence", "@strength{(0%)}")], line_policy='next', renderers=[r_l1,r_l2,r_l3,r_l4,r_l5,r_l6,r_l7,r_l0])

plot.add_tools(hover)
callback.args = dict(radio=radio, l0=l0, l1=l1, l2=l2, l3=l3, l4=l4, l5=l5, l6=l6, l7=l7, center=center,
                    c0=c0, c1=c1, c2=c2, c3=c3, c4=c4, c5=c5, c6=c6, c7=c7,
                    a0=a0, a1=a1, a2=a2, a3=a3, a4=a4, a5=a5, a6=a6, a7=a7,
                    ccenter=ccenter, colors=ColumnDataSource(dim),
                    r_l0=r_l0, r_l1=r_l1, r_l2=r_l2, r_l3=r_l3, r_l4=r_l4,
                     r_l5=r_l5, r_l6=r_l6, r_l7=r_l7, hover=hover)

plot.axis.visible = False
plot.xgrid.grid_line_color = None
plot.ygrid.grid_line_color = None
plot.title.align = "center"
plot.title.text_font_size = "20px"

plot.js_on_event(MouseEnter, callback)

layout = row(plot, radio)

output_file('sas_char_network.html')
show(layout)

# Find network of primary characters - Pride and Prejudice

In [None]:
emma_chars = [['Emma Woodhouse', 'Emma'], 
             ['Mr. Knightley', 'Knightley'],
             ['Frank Churchill', 'Frank', 'Mr. Churchill'],
             ['Jane Fairfax', 'Jane'],
             ['Harriet Smith', 'Harriet'], 
             ['Miss Bates', 'Bates'],
             ['Mrs. Weston', 'Taylor'],
             ['Mr. Elton', 'Elton']]
pap_chars = [['Elizabeth Bennet', 'Elizabeth', 'Lizzie'],
             ['Jane Bennet', 'Jane', 'Ms. Bennet'],
             ['George Wickham', 'Wickham'],
             ['Mr. Darcy', 'Darcy'],
             ['Mr. Bingley', 'Bingley'],
             ['Charlotte Lucas', 'Charlotte', 'Ms. Lucas', 'Mrs. Collins'],
             ['Lady Catherine', 'De Bourgh'],
             ['Mr. Collins', 'Collins']]
sas_chars = [['Elinor Dashwood', 'Elinor', 'Miss Dashwood'],
            ['Marianne Dashwood', 'Marianne'],
            ['Colonel Brandon', 'Brandon'],
            ['John Willoughby', 'Willoughby'],
            ['Edward Ferrars', 'Edward', 'Mr. Ferrars'],
            ['Miss Grey', 'Sophia'],
            ['Lucy Steele', 'Miss Steele'],
            ['Mrs. Jennings']]

chars = [item[0] for item in pap_chars]

In [None]:
relation_df = pd.DataFrame()
for viewpoint, target in list(itertools.permutations(pap_chars, 2)):
    relation_df = relation_df.append(pd.Series([viewpoint[0], target[0], get_relationship_float(pap_data, viewpoint, target)], index=['viewpoint', 'target', 'score']), ignore_index=True)
    
print relation_df['score'].mean()

In [None]:
dim = pd.DataFrame()
dim['node'] = chars
dim['circ'] = [i*2*math.pi/len(chars) for i in range(len(chars))]
dim['x'] = [math.sin(i) for i in dim['circ']]
dim['y'] = [math.cos(i) for i in dim['circ']]
dim['color'] = [matplotlib.colors.rgb2hex(i[:3]) for i in cm.get_cmap('plasma')(np.linspace(0, 1, len(dim)))]

dim.set_index('node', inplace=True)
dim.head()

In [None]:
connections = pd.DataFrame(columns=
                ['viewpoint', 'target', 'x_start', 'y_start', 'x_end', 'y_end', 'strength'])
for index, row in relation_df.iterrows():
    start = row['viewpoint']
    end = row['target']
    x_start = 0
    y_start = 0
    x_end = dim.loc[end]['x']
    y_end = dim.loc[end]['y']
    connections = connections.append(
            pd.Series([start, end, x_start, y_start, x_end, y_end, row['score']],
                      index=['viewpoint', 'target', 'x_start', 'y_start', 'x_end', 'y_end', 'strength']), ignore_index=True)         
dim.reset_index(inplace=True)

connections['scale'] = [(i-min(connections['strength']))/(max(connections['strength'])-min(connections['strength']))*20+.05 for i in connections['strength']]
connections['xs'] = list(list(lst) for lst in connections[['x_start', 'x_end']].values)
connections['ys'] = list(list(lst) for lst in connections[['y_start', 'y_end']].values)
connections.head()

In [None]:
from bokeh.models.glyphs import Circle, MultiLine
from bokeh.models import ColumnDataSource, Label, LabelSet, CustomJS, HoverTool, Arrow, VeeHead
from bokeh.plotting import Figure, show, output_notebook, output_file
from bokeh.models.widgets.groups import RadioGroup
from bokeh.layouts import row
from bokeh.events import MouseEnter
from bokeh.models.formatters import NumeralTickFormatter
output_notebook()

plot = Figure(plot_width=500, plot_height=500, 
              x_range=[-1.6,1.4], y_range=[-1.5,1.5], title='Pride and Prejudice Character Network',
             tools=[''])

l0 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=.98)
r_l0 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Elizabeth Bennet']), l0)

l1 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=0)
r_l1 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Jane Bennet']), l1)

l2 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=0)
r_l2 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='George Wickham']), l2)

l3 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=0)
r_l3 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Mr. Darcy']), l3)

l4 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=0)
r_l4 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Mr. Bingley']), l4)

l5 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=0)
r_l5 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Charlotte Lucas']), l5)

l6 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=.0)
r_l6 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Lady Catherine']), l6)

l7 = MultiLine(xs="xs", ys="ys", line_color="#83529b", line_width="scale", line_alpha=0)
r_l7 = plot.add_glyph(ColumnDataSource(connections[connections['viewpoint']=='Mr. Collins']), l7)

c0 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color', line_alpha=0, fill_alpha=0)
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Elizabeth Bennet']), c0)

c1 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Jane Bennet']), c1)

c2 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='George Wickham']), c2)

c3 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Mr. Darcy']), c3)

c4 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Mr. Bingley']), c4)

c5 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Charlotte Lucas']), c5)

c6 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Lady Catherine']), c6)

c7 = Circle(x='x', y='y', size=20, fill_color='color', line_color='color')
plot.add_glyph(ColumnDataSource(dim[dim['node']=='Mr. Collins']), c7)

ccenter = Circle(x=0, y=0, size=20, fill_color=dim[dim['node']=='Elizabeth Bennet']['color'].values[0], 
                      line_color=dim[dim['node']=='Elizabeth Bennet']['color'].values[0])
plot.add_glyph(ccenter)

a0 = Label(x=dim[dim['node']=='Elizabeth Bennet']['x'].values[0], y=dim[dim['node']=='Elizabeth Bennet']['y'].values[0], 
           text='Elizabeth Bennet', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt', text_color="white")
plot.add_layout(a0)

a1 = Label(x=dim[dim['node']=='Jane Bennet']['x'].values[0], y=dim[dim['node']=='Jane Bennet']['y'].values[0], 
           text='Jane Bennet', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a1)

a2 = Label(x=dim[dim['node']=='George Wickham']['x'].values[0], y=dim[dim['node']=='George Wickham']['y'].values[0], 
           text='George Wickham', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a2)

a3 = Label(x=dim[dim['node']=='Mr. Darcy']['x'].values[0], y=dim[dim['node']=='Mr. Darcy']['y'].values[0], 
           text='Mr. Darcy', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a3)

a4 = Label(x=dim[dim['node']=='Mr. Bingley']['x'].values[0], y=dim[dim['node']=='Mr. Bingley']['y'].values[0], 
           text='Mr. Bingley', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a4)

a5 = Label(x=dim[dim['node']=='Charlotte Lucas']['x'].values[0], y=dim[dim['node']=='Charlotte Lucas']['y'].values[0], 
           text='Charlotte Lucas', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a5)

a6 = Label(x=dim[dim['node']=='Lady Catherine']['x'].values[0], y=dim[dim['node']=='Lady Catherine']['y'].values[0], 
           text='Lady Catherine', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a6)

a7 = Label(x=dim[dim['node']=='Mr. Collins']['x'].values[0], y=dim[dim['node']=='Mr. Collins']['y'].values[0], 
           text='Mr. Collins', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(a7)

center = Label(x=0, y=0, text='Elizabeth Bennet', text_align='right', x_offset=-10, y_offset=10,
              level='glyph', text_font_size='8pt')
plot.add_layout(center)
plot.legend.location = None

code = """
ccenter.fill_color = colors.data['color'][radio.active]
ccenter.line_color = colors.data['color'][radio.active]
if (radio.active == 0) {
    l0.line_alpha.value = 1
    c0.fill_alpha = 0
    c0.line_alpha = 0
    a0.text_color = "white"
    hover.renderers = [r_l0]
} else {
    l0.line_alpha.value = 0
    c0.fill_alpha = 1
    c0.line_alpha = 1
    a0.text_color = "black"
}
if (radio.active == 1) {
    l1.line_alpha.value = 1
    c1.fill_alpha = 0
    c1.line_alpha = 0
    a1.text_color = "white"
    hover.renderers = [r_l1]
} else {
    l1.line_alpha.value = 0
    c1.fill_alpha = 1
    c1.line_alpha = 1
    a1.text_color = "black"
}
if (radio.active == 2) {
    l2.line_alpha.value = 1
    c2.fill_alpha = 0
    c2.line_alpha = 0
    a2.text_color = "white"
    hover.renderers = [r_l2]
} else {
    l2.line_alpha.value = 0
    c2.fill_alpha = 1
    c2.line_alpha = 1
    a2.text_color = "black"
}
if (radio.active == 3) {
    l3.line_alpha.value = 1
    c3.fill_alpha = 0
    c3.line_alpha = 0
    a3.text_color = "white"
    hover.renderers = [r_l3]
} else {
    l3.line_alpha.value = 0
    c3.fill_alpha = 1
    c3.line_alpha = 1
    a3.text_color = "black"
}
if (radio.active == 4) {
    l4.line_alpha.value = 1
    c4.fill_alpha = 0
    c4.line_alpha = 0
    a4.text_color = "white"
    hover.renderers = [r_l4]
} else {
    l4.line_alpha.value = 0
    c4.fill_alpha = 1
    c4.line_alpha = 1
    a4.text_color = "black"
}
if (radio.active == 5) {
    l5.line_alpha.value = 1
    c5.fill_alpha = 0
    c5.line_alpha = 0
    a5.text_color = "white"
    hover.renderers = [r_l5]
} else {
    l5.line_alpha.value = 0
    c5.fill_alpha = 1
    c5.line_alpha = 1
    a5.text_color = "black"
}
if (radio.active == 6) {
    l6.line_alpha.value = 1
    c6.fill_alpha = 0
    c6.line_alpha = 0
    a6.text_color = "white"
    hover.renderers = [r_l6]
} else {
    l6.line_alpha.value = 0
    c6.fill_alpha = 1
    c6.line_alpha = 1
    a6.text_color = "black"
}
if (radio.active == 7) {
    l7.line_alpha.value = 1
    c7.fill_alpha = 0
    c7.line_alpha = 0
    a7.text_color = "white"
    hover.renderers = [r_l7]
} else {
    l7.line_alpha.value = 0
    c7.fill_alpha = 1
    c7.line_alpha = 1
    a7.text_color = "black"
}
center.text = radio.labels[radio.active]
"""

callback = CustomJS(code=code, args={})
radio = RadioGroup(labels=list(connections['viewpoint'].unique()), active=0, callback=callback, height=500, width=200)

hover = HoverTool(tooltips=[
    ("Viewpoint", "@viewpoint"),
    ("Target", "@target"),
    ("Co-occurrence", "@strength{(0%)}")], line_policy='next', renderers=[r_l1,r_l2,r_l3,r_l4,r_l5,r_l6,r_l7,r_l0])

plot.add_tools(hover)
callback.args = dict(radio=radio, l0=l0, l1=l1, l2=l2, l3=l3, l4=l4, l5=l5, l6=l6, l7=l7, center=center,
                    c0=c0, c1=c1, c2=c2, c3=c3, c4=c4, c5=c5, c6=c6, c7=c7,
                    a0=a0, a1=a1, a2=a2, a3=a3, a4=a4, a5=a5, a6=a6, a7=a7,
                    ccenter=ccenter, colors=ColumnDataSource(dim),
                    r_l0=r_l0, r_l1=r_l1, r_l2=r_l2, r_l3=r_l3, r_l4=r_l4,
                     r_l5=r_l5, r_l6=r_l6, r_l7=r_l7, hover=hover)

plot.axis.visible = False
plot.xgrid.grid_line_color = None
plot.ygrid.grid_line_color = None
plot.title.align = "center"
plot.title.text_font_size = "20px"

plot.js_on_event(MouseEnter, callback)

layout = row(plot, radio)

#output_file('pap_char_network.html')
show(layout)