# Import Libraries

In [1]:
import pandas as pd
import os

# Config

In [2]:
data_dir = "data"
image_dir = "images"

# Get Raw Data

In [3]:
# 2020 data
# edges = pd.read_csv(f"{data_dir}/2020/edges.csv")

In [4]:
# New data
excel_file = "data/word_docs/ALL.xls"
sheets = []
keep_cols = ["character", "address", "reference", "person_referred_to", "relation"]
for i in range(5):
    sheet = pd.read_excel(excel_file, sheet_name=i) 
    sheet.columns = [col.strip().lower().replace(' ', '_') for col in sheet.columns]
    sheets.append(sheet[keep_cols])
edges = pd.concat(sheets)

## Clean up columns

In [5]:
edges = edges.rename(columns={'character':'person', 'person_referred_to':'referent'})
edges = edges.dropna(subset=['person','referent']).copy()

In [6]:
edges

Unnamed: 0,person,address,reference,referent,relation
2,"Cibber, Colley",Testimonies,Hervey compares Pope to Dryden and Prior in A ...,John Hervey,Akin to
4,"Cibber, Colley",Testimonies,,John Dryden,Attacked
5,"Cibber, Colley",Testimonies,,Matthew Prior,Attacked
6,"Cibber, Colley",Testimonies,Cibber on the Dunciad,Alexander Pope,Attacked
7,"Cibber, Colley",Testimonies,Pope compares Cibber to Dennis,John Dennis,Akin to
...,...,...,...,...,...
34,"Jacob, Giles","III, Note to 149, 150",Jacob’s association with Dennis (Blunderbuss a...,John Dennis,Akin to
36,"Jacob, Giles","III, Note to 149, 150",Jacob’s respect for our Author,Alexander Pope,Attacked
37,"Jacob, Giles","III, Note to 149, 150",Jacob’s letter to Dennis,John Dennis,Akin to
38,"Jacob, Giles","III, Note to 173",Alludes to Dennis’s account of himself in Jaco...,John Dennis,Defended


## Clean up values

In [7]:
for col in edges.columns:
    edges[col] = edges[col].str.strip()

In [8]:
edges['relation_id'] = edges['relation'].str.lower().str.replace(' ', '_')

In [9]:
edges

Unnamed: 0,person,address,reference,referent,relation,relation_id
2,"Cibber, Colley",Testimonies,Hervey compares Pope to Dryden and Prior in A ...,John Hervey,Akin to,akin_to
4,"Cibber, Colley",Testimonies,,John Dryden,Attacked,attacked
5,"Cibber, Colley",Testimonies,,Matthew Prior,Attacked,attacked
6,"Cibber, Colley",Testimonies,Cibber on the Dunciad,Alexander Pope,Attacked,attacked
7,"Cibber, Colley",Testimonies,Pope compares Cibber to Dennis,John Dennis,Akin to,akin_to
...,...,...,...,...,...,...
34,"Jacob, Giles","III, Note to 149, 150",Jacob’s association with Dennis (Blunderbuss a...,John Dennis,Akin to,akin_to
36,"Jacob, Giles","III, Note to 149, 150",Jacob’s respect for our Author,Alexander Pope,Attacked,attacked
37,"Jacob, Giles","III, Note to 149, 150",Jacob’s letter to Dennis,John Dennis,Akin to,akin_to
38,"Jacob, Giles","III, Note to 173",Alludes to Dennis’s account of himself in Jaco...,John Dennis,Defended,defended


## Normalize names

In [10]:
edges.loc[edges.person.str.contains(','), 'person'] = edges.loc[edges.person.str.contains(',')].person.apply(lambda x: f"{x.split(',')[1].strip()} {x.split(',')[0]}")

In [11]:
edges.loc[edges.person == 'Edmund Curl', 'person'] = 'Edmund Curll' 

In [12]:
edges.loc[edges.person == 'Curll Edmund', 'person'] = 'Edmund Curll' 

In [13]:
edges.person.value_counts()

person
Edmund Curll     115
John Dennis      109
Colley Cibber     71
Giles Jacob       18
John Henley        8
Name: count, dtype: int64

In [14]:
edges

Unnamed: 0,person,address,reference,referent,relation,relation_id
2,Colley Cibber,Testimonies,Hervey compares Pope to Dryden and Prior in A ...,John Hervey,Akin to,akin_to
4,Colley Cibber,Testimonies,,John Dryden,Attacked,attacked
5,Colley Cibber,Testimonies,,Matthew Prior,Attacked,attacked
6,Colley Cibber,Testimonies,Cibber on the Dunciad,Alexander Pope,Attacked,attacked
7,Colley Cibber,Testimonies,Pope compares Cibber to Dennis,John Dennis,Akin to,akin_to
...,...,...,...,...,...,...
34,Giles Jacob,"III, Note to 149, 150",Jacob’s association with Dennis (Blunderbuss a...,John Dennis,Akin to,akin_to
36,Giles Jacob,"III, Note to 149, 150",Jacob’s respect for our Author,Alexander Pope,Attacked,attacked
37,Giles Jacob,"III, Note to 149, 150",Jacob’s letter to Dennis,John Dennis,Akin to,akin_to
38,Giles Jacob,"III, Note to 173",Alludes to Dennis’s account of himself in Jaco...,John Dennis,Defended,defended


In [15]:
edges.loc[edges.referent.str.contains(','), 'referent'] = edges.loc[edges.referent.str.contains(',')].referent.apply(lambda x: f"{x.split(',')[1].strip()} {x.split(',')[0]}")

In [16]:
edges.loc[edges.referent == 'Edmund Curl', 'referent'] = 'Edmund Curll' 

In [30]:
print('\n'.join(edges.referent.value_counts().sort_index().index.to_list()))

(possibly) Benjamin Hoadly
A. Moore
Abel Boyer
Abel Evans
Abel Roper
Alexander Pope
Ambrose Phillips
B.B.?
Barton Booth
Bavius
Bernard Lintot
Besaleel Morris
Caius Gabriel Cibber
Camillo Querno
Cardell Goodman
Charles Fleetwood
Charles Gildon
Colley Cibber
Daniel Defoe
Dulness
Edmund Curll
Edmund Gibson
Edward Ward
Edward Young
Eliza Haywood
Elizabeth Thomas
Elkanah Settle
Eustace Budgell
Frances Hare
Francis Atterbury
George Ducket
George Duckett
George Etherege
George I
George II
George Ridpath
Giles Jacob
Goddess of the Common-sewers Cloacina
Henry Cromwell
Henry Curll
Henry Janssen
Hilkiah (should be Arthur Bedford) Bedford
Isaac Barrow
Isaac Newton
James Moore Smythe
James Ralph
Jeremy Collier
John Anstis
John Atterbury
John Banks
John Dennis
John Dryden
John Dunton
John Durant Breval
John Fletcher
John Garth
John Gay
John Henley
John Hervey
John Locke
John Oldmixon
John Ozell
John Tutchin
Jonathan Swift
Joseph Addison
Joseph Gay (aka John Durant Breval)
Lady Mary Wortley Montagu


# Extract Relations

In [18]:
R = edges.value_counts('relation_id').to_frame('n')

In [19]:
R['color'] = ['green','red','orange','blue']
R['label'] = R.index.str.replace('_', ' ')
R

Unnamed: 0_level_0,n,color,label
relation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
akin_to,170,green,akin to
attacked,106,red,attacked
dissimilar,27,orange,dissimilar
defended,17,blue,defended


# Extract Nodes

In [20]:
# Extract and format Nodes
N = pd.concat([edges.person, edges.referent]).value_counts().to_frame('n')
N = N.reset_index().rename(columns={'index':'full_name'})
N['index'] = N['full_name'].str.replace(' ', '_').str.upper().str.replace(r"\W", "", regex=True)
N = N.set_index('index')
N['label'] = N['full_name'].str.replace(' ', '<br/>')
N['dot'] = N.apply(lambda x: f"{x.name} [label=< {x.label} >]", axis=1)

In [21]:
N.head()

Unnamed: 0_level_0,full_name,n,label,dot
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
JOHN_DENNIS,John Dennis,124,John<br/>Dennis,JOHN_DENNIS [label=< John<br/>Dennis >]
EDMUND_CURLL,Edmund Curll,120,Edmund<br/>Curll,EDMUND_CURLL [label=< Edmund<br/>Curll >]
ALEXANDER_POPE,Alexander Pope,84,Alexander<br/>Pope,ALEXANDER_POPE [label=< Alexander<br/>Pope >]
COLLEY_CIBBER,Colley Cibber,75,Colley<br/>Cibber,COLLEY_CIBBER [label=< Colley<br/>Cibber >]
GILES_JACOB,Giles Jacob,22,Giles<br/>Jacob,GILES_JACOB [label=< Giles<br/>Jacob >]


# Extract Edges

In [22]:
edges['person_id'] = edges.person.map(N.reset_index().set_index('full_name')['index'])
edges['referent_id'] = edges.referent.map(N.reset_index().set_index('full_name')['index'])
# edges['relation_id'] = edges.relation.str.lower().str.replace(' ', '_', regex=True)

In [23]:
# Extract Edges
E = edges.groupby(['person_id','referent_id','relation_id']).relation_id.count().to_frame('n')
E.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,n
person_id,referent_id,relation_id,Unnamed: 3_level_1
COLLEY_CIBBER,ALEXANDER_POPE,attacked,10
COLLEY_CIBBER,BARTON_BOOTH,akin_to,1
COLLEY_CIBBER,CAIUS_GABRIEL_CIBBER,akin_to,2
COLLEY_CIBBER,CAMILLO_QUERNO,akin_to,2
COLLEY_CIBBER,CARDELL_GOODMAN,akin_to,1


In [24]:
E['dot'] = E.apply(lambda x: f"{x.name[0]} -> {x.name[1]} [color={R.loc[x.name[2], 'color']} weight={x.n**2}]", axis=1)

In [25]:
E

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,n,dot
person_id,referent_id,relation_id,Unnamed: 3_level_1,Unnamed: 4_level_1
COLLEY_CIBBER,ALEXANDER_POPE,attacked,10,COLLEY_CIBBER -> ALEXANDER_POPE [color=red wei...
COLLEY_CIBBER,BARTON_BOOTH,akin_to,1,COLLEY_CIBBER -> BARTON_BOOTH [color=green wei...
COLLEY_CIBBER,CAIUS_GABRIEL_CIBBER,akin_to,2,COLLEY_CIBBER -> CAIUS_GABRIEL_CIBBER [color=g...
COLLEY_CIBBER,CAMILLO_QUERNO,akin_to,2,COLLEY_CIBBER -> CAMILLO_QUERNO [color=green w...
COLLEY_CIBBER,CARDELL_GOODMAN,akin_to,1,COLLEY_CIBBER -> CARDELL_GOODMAN [color=green ...
...,...,...,...,...
JOHN_HENLEY,JAMES_RALPH,akin_to,2,JOHN_HENLEY -> JAMES_RALPH [color=green weight=4]
JOHN_HENLEY,POSSIBLY_BENJAMIN_HOADLY,akin_to,1,JOHN_HENLEY -> POSSIBLY_BENJAMIN_HOADLY [color...
JOHN_HENLEY,RICHARD_BLACKMORE,akin_to,1,JOHN_HENLEY -> RICHARD_BLACKMORE [color=green ...
JOHN_HENLEY,RICHARD_FLECKNOE,akin_to,1,JOHN_HENLEY -> RICHARD_FLECKNOE [color=green w...


# Draw Graphs

In [26]:
def create_graph(persons:[]=[], mode='fdp'):
    global E, N    
    
    # Create subsets of N and E
    E1 = E.loc[persons] if len(persons) > 0 else E
    node_idx = list(set(persons + E1.reset_index()['referent_id'].to_list()))
    N1 = N.loc[node_idx].sort_index()
    
    # Convert to Graphviz
    graph_text = "digraph G {\n"
    graph_text += "rankdir=LR\n"
    graph_text += "node [shape=plaintext]\n"
    graph_text += '\n'.join(N1['dot'].values)
    graph_text += "\n"
    graph_text += '\n'.join(E1['dot'].values)
    graph_text += "\n}"
    
    # print(graph_text)
    
    # Print to file
    file_name = '_'.join(persons) + ".dot" if len(persons) else "images/all.dot"
    file_name = image_dir + "/" + file_name
    print(file_name)
    with open(file_name, "w") as outfile:
        outfile.write(graph_text)
    
    for format in "jpeg svg png".split():
        os.system(f"circo -K{mode} -Tjpeg {file_name} > {file_name}_{mode}.{format}")

In [27]:
persons = E.index.levels[0].to_list()

In [28]:
create_graph(persons)

images/COLLEY_CIBBER_EDMUND_CURLL_GILES_JACOB_JOHN_DENNIS_JOHN_HENLEY.dot


In [29]:
for person in persons:
    create_graph([person])

images/COLLEY_CIBBER.dot
images/EDMUND_CURLL.dot
images/GILES_JACOB.dot
images/JOHN_DENNIS.dot
images/JOHN_HENLEY.dot
