# Import Libraries

In [1]:
import pandas as pd
import os

# Get Raw Data

In [2]:
# nodes = pd.read_csv("db/2020/nodes.csv") # Not needed
edges = pd.read_csv("db/2020/edges.csv")

In [3]:
edges

Unnamed: 0,row_id,person,referent,relation,rel
0,0,Colley Cibber,Alexander Pope,Attacked,-2
1,1,Colley Cibber,Alexander Pope,Attacked,-2
2,2,Colley Cibber,John Dennis,Akin to,1
3,3,Colley Cibber,Lewis Theobald,Akin to,1
4,4,Colley Cibber,Stephen Duck,Akin to,1
...,...,...,...,...,...
341,341,John Henley,John Toland,Akin to,1
342,342,John Henley,Matthew Tindal,Akin to,1
343,343,John Henley,Thomas Woolston,Akin to,1
344,344,John Henley,Thomas Sherlock,Akin to,1


# Extract Relations

In [9]:
R = edges.value_counts('relation_id').to_frame('n')
R['color'] = ['green','red','orange','blue']
R['label'] = R.index.str.replace('_', ' ')
R

Unnamed: 0_level_0,n,color,label
relation_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
akin_to,189,green,akin to
attacked,113,red,attacked
dissimilar,25,orange,dissimilar
defended,19,blue,defended


# Extract Nodes

In [10]:
# Extract and format Nodes
N = pd.concat([edges.person, edges.referent]).value_counts().to_frame('n')
N = N.reset_index().rename(columns={'index':'full_name'})
N['index'] = N['full_name'].str.replace(' ', '_').str.upper().str.replace(r"\W", "", regex=True)
N = N.set_index('index')
N['label'] = N['full_name'].str.replace(' ', '<br/>')
N['dot'] = N.apply(lambda x: f"{x.name} [label=< {x.label} >]", axis=1)

In [11]:
N.head()

Unnamed: 0_level_0,full_name,n,label,dot
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
EDMUND_CURLL,Edmund Curll,131,Edmund<br/>Curll,EDMUND_CURLL [label=< Edmund<br/>Curll >]
JOHN_DENNIS,John Dennis,122,John<br/>Dennis,JOHN_DENNIS [label=< John<br/>Dennis >]
ALEXANDER_POPE,Alexander Pope,83,Alexander<br/>Pope,ALEXANDER_POPE [label=< Alexander<br/>Pope >]
COLLEY_CIBBER,Colley Cibber,78,Colley<br/>Cibber,COLLEY_CIBBER [label=< Colley<br/>Cibber >]
GILES_JACOB,Giles Jacob,24,Giles<br/>Jacob,GILES_JACOB [label=< Giles<br/>Jacob >]


# Extract Edges

In [12]:
edges['person_id'] = edges.person.map(N.reset_index().set_index('full_name')['index'])
edges['referent_id'] = edges.referent.map(N.reset_index().set_index('full_name')['index'])
edges['relation_id'] = edges.relation.str.lower().str.replace(' ', '_', regex=True)

In [13]:
# Extract Edges
E = edges.groupby(['person_id','referent_id','relation_id']).relation_id.count().to_frame('n')
E.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,n
person_id,referent_id,relation_id,Unnamed: 3_level_1
COLLEY_CIBBER,ALEXANDER_POPE,akin_to,1
COLLEY_CIBBER,ALEXANDER_POPE,attacked,9
COLLEY_CIBBER,BARTON_BOOTH,akin_to,1
COLLEY_CIBBER,CAIUS_GABRIEL_CIBBER,akin_to,2
COLLEY_CIBBER,CAMILLO_QUERNO,akin_to,2


In [14]:
E['dot'] = E.apply(lambda x: f"{x.name[0]} -> {x.name[1]} [color={R.loc[x.name[2], 'color']} weight={x.n**2}]", axis=1)

In [15]:
E

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,n,dot
person_id,referent_id,relation_id,Unnamed: 3_level_1,Unnamed: 4_level_1
COLLEY_CIBBER,ALEXANDER_POPE,akin_to,1,COLLEY_CIBBER -> ALEXANDER_POPE [color=green w...
COLLEY_CIBBER,ALEXANDER_POPE,attacked,9,COLLEY_CIBBER -> ALEXANDER_POPE [color=red wei...
COLLEY_CIBBER,BARTON_BOOTH,akin_to,1,COLLEY_CIBBER -> BARTON_BOOTH [color=green wei...
COLLEY_CIBBER,CAIUS_GABRIEL_CIBBER,akin_to,2,COLLEY_CIBBER -> CAIUS_GABRIEL_CIBBER [color=g...
COLLEY_CIBBER,CAMILLO_QUERNO,akin_to,2,COLLEY_CIBBER -> CAMILLO_QUERNO [color=green w...
...,...,...,...,...
JOHN_HENLEY,MATTHEW_TINDAL,akin_to,1,JOHN_HENLEY -> MATTHEW_TINDAL [color=green wei...
JOHN_HENLEY,RICHARD_BLACKMORE,akin_to,1,JOHN_HENLEY -> RICHARD_BLACKMORE [color=green ...
JOHN_HENLEY,RICHARD_FLECKNOE,akin_to,1,JOHN_HENLEY -> RICHARD_FLECKNOE [color=green w...
JOHN_HENLEY,THOMAS_SHERLOCK,akin_to,1,JOHN_HENLEY -> THOMAS_SHERLOCK [color=green we...


# Draw Graphs

In [45]:
def create_graph(persons:[]=[], mode='fdp'):
    global E, N    
    
    # Create subsets of N and E
    E1 = E.loc[persons] if len(persons) > 0 else E
    node_idx = list(set(persons + E1.reset_index()['referent_id'].to_list()))
    N1 = N.loc[node_idx].sort_index()
    
    # Convert to Graphviz
    graph_text = "digraph G {\n"
    graph_text += "rankdir=LR\n"
    graph_text += "node [shape=plaintext]\n"
    graph_text += '\n'.join(N1['dot'].values)
    graph_text += "\n"
    graph_text += '\n'.join(E1['dot'].values)
    graph_text += "\n}"
    
    # print(graph_text)
    
    # Print to file
    file_name = "images/" + '_'.join(persons) + ".dot" if len(persons) else "images/all.dot"
    print(file_name)
    with open(file_name, "w") as outfile:
        outfile.write(graph_text)
    
    for format in "jpeg svg png".split():
        os.system(f"circo -K{mode} -Tjpeg {file_name} > {file_name}_{mode}.{format}")

In [46]:
persons = E.index.levels[0].to_list()

In [47]:
create_graph(persons)

images/COLLEY_CIBBER_EDMUND_CURLL_ELIZA_HAYWOOD_GILES_JACOB_JOHN_DENNIS_JOHN_HENLEY.dot


In [44]:
for person in persons:
    create_graph([person])

images/COLLEY_CIBBER.dot
images/EDMUND_CURLL.dot
images/ELIZA_HAYWOOD.dot
images/GILES_JACOB.dot
images/JOHN_DENNIS.dot
images/JOHN_HENLEY.dot
