# Entry 23c notebook: Nearest Villain Neighbor Egonet Densities

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')
%matplotlib inline

import warnings
warnings.filterwarnings('once')

from neo4j import GraphDatabase

In [2]:
uri = "bolt://localhost:7687"

driver = GraphDatabase.driver(uri, auth=('neo4j', 'password'))

uni_session = driver.session(database="unimodal")
bi_session = driver.session(database="bimodal")
mix_session = driver.session(database="mixmodal")

  and should_run_async(code)


In [8]:
def local_densities(con, model_type='uni', max_level=1):
    if model_type == 'uni' or model_type == 'mix':
        rel_type = 'KNOWS'
        subgraph_query = '''MATCH (h:Hero)
        call apoc.path.subgraphAll(h, {maxLevel:'''  + f"{max_level}, labelFilter:'Villain|Comic', relationshipFilter:'{rel_type}'" + '''})
        YIELD nodes, relationships
        RETURN h.name as hero, labels(h)[-1] as h_type, size(relationships) as rel_ct, 'na' as node_type, size(nodes) as node_ct'''
    else:
        max_level = max_level * 2
        rel_type = 'APPEARS_IN'
        subgraph_query = '''MATCH (h:Hero)
        call apoc.path.subgraphAll(h, {maxLevel:'''  + f"{max_level}, labelFilter:'Villain|Comic', relationshipFilter:'{rel_type}'" + '''})
        YIELD nodes, relationships
        WITH h.name as hero, labels(h)[-1] as h_type, nodes, size(relationships) as rel_ct
        UNWIND nodes as indv_node
        RETURN hero, h_type, rel_ct, labels(indv_node)[0] as node_type, count(indv_node) as node_ct'''
    
    df = pd.DataFrame(con.run(subgraph_query).data())
    df.columns = ['hero', 'h_type', 'rel_ct', 'node_type', 'node_ct']
    
    if model_type == 'uni' or model_type == 'mix':
        df['p_rel_ct'] = (df['node_ct'] * (df['node_ct'] - 1)) / 2
    else:
        df = pd.pivot_table(df, index=['hero', 'h_type', 'rel_ct'], columns='node_type', values='node_ct').reset_index()
        df['p_rel_ct'] = df['Hero'] * df['Comic']
        
    df['density'] = df['rel_ct'] / df['p_rel_ct']
    return df

  and should_run_async(code)


## Uni Densities Nearest Neighbors

In [4]:
uni_densities = local_densities(uni_session)
uni_densities.to_csv('data/uni_nn_villain_densities.csv')
uni_densities

Unnamed: 0,hero,h_type,rel_ct,node_type,node_ct,p_rel_ct,density
0,24-HOUR MAN/EMMANUEL,Hero,0,na,1,0.0,
1,3-D MAN/CHARLES CHAN,Hero,56,na,12,66.0,0.848485
2,4-D MAN/MERCURIO,Villain,26,na,12,66.0,0.393939
3,8-BALL/,Hero,0,na,1,0.0,
4,"ABBOTT, JACK",Hero,6,na,4,6.0,1.000000
...,...,...,...,...,...,...,...
6434,"ZOTA, CARLO",Hero,59,na,14,91.0,0.648352
6435,ZOTA,Hero,1,na,2,1.0,1.000000
6436,ZURAS,Hero,30,na,13,78.0,0.384615
6437,ZURI,Hero,52,na,16,120.0,0.433333


In [14]:
uni_densities.describe()

Unnamed: 0,rel_ct,node_ct,p_rel_ct,density
count,6439.0,6439.0,6439.0,5266.0
mean,74.702904,11.354248,389.533313,0.786164
std,288.689129,25.721677,2840.809768,0.258926
min,0.0,1.0,0.0,0.066508
25%,1.0,2.0,1.0,0.6
50%,6.0,4.0,6.0,0.94475
75%,28.0,10.0,45.0,1.0
max,5123.0,393.0,77028.0,1.0


## Bi Densities Nearest Neighbors

In [9]:
bi_densities = local_densities(bi_session, model_type='bi')
bi_densities.to_csv('data/bi_nn_villain_densities.csv')
bi_densities

node_type,hero,h_type,rel_ct,Comic,Hero,p_rel_ct,density
0,24-HOUR MAN/EMMANUEL,Hero,1,1,1,1,1.000000
1,3-D MAN/CHARLES CHAN,Hero,22,9,12,108,0.203704
2,4-D MAN/MERCURIO,Villain,28,9,12,108,0.259259
3,8-BALL/,Hero,3,3,1,3,1.000000
4,A,Hero,7,3,3,9,0.777778
...,...,...,...,...,...,...,...
6434,ZOTA,Hero,3,2,2,4,0.750000
6435,"ZOTA, CARLO",Hero,24,10,14,140,0.171429
6436,ZURAS,Hero,44,23,13,299,0.147157
6437,ZURI,Hero,57,21,16,336,0.169643


In [17]:
bi_densities.describe()

  and should_run_async(code)


node_type,rel_ct,Comic,Hero,p_rel_ct,density
count,6439.0,6439.0,6439.0,6439.0,6439.0
mean,44.501475,14.925299,11.354248,1540.55521,0.652676
std,177.445898,59.268725,25.721677,15810.877476,0.343829
min,1.0,1.0,1.0,1.0,0.007479
25%,3.0,1.0,2.0,3.0,0.333333
50%,7.0,3.0,4.0,10.0,0.685714
75%,24.0,8.0,10.0,70.0,1.0
max,4870.0,1577.0,393.0,555104.0,1.0


## Mixed Uni Densities Nearest Neighbors 

In [6]:
mix_uni_densities = local_densities(mix_session)
mix_uni_densities.to_csv('data/mix_uni_nn_villain_densities.csv')
mix_uni_densities

  and should_run_async(code)


Unnamed: 0,hero,h_type,rel_ct,node_type,node_ct,p_rel_ct,density
0,24-HOUR MAN/EMMANUEL,Hero,0,na,1,0.0,
1,3-D MAN/CHARLES CHAN,Hero,56,na,12,66.0,0.848485
2,4-D MAN/MERCURIO,Villain,26,na,12,66.0,0.393939
3,8-BALL/,Hero,0,na,1,0.0,
4,"ABBOTT, JACK",Hero,6,na,4,6.0,1.000000
...,...,...,...,...,...,...,...
6434,"ZOTA, CARLO",Hero,59,na,14,91.0,0.648352
6435,ZOTA,Hero,1,na,2,1.0,1.000000
6436,ZURAS,Hero,30,na,13,78.0,0.384615
6437,ZURI,Hero,52,na,16,120.0,0.433333


In [16]:
mix_uni_densities.describe()

  and should_run_async(code)


Unnamed: 0,rel_ct,node_ct,p_rel_ct,density
count,6439.0,6439.0,6439.0,5266.0
mean,74.702904,11.354248,389.533313,0.786164
std,288.689129,25.721677,2840.809768,0.258926
min,0.0,1.0,0.0,0.066508
25%,1.0,2.0,1.0,0.6
50%,6.0,4.0,6.0,0.94475
75%,28.0,10.0,45.0,1.0
max,5123.0,393.0,77028.0,1.0


## Mixed Bi Densities Nearest Neighbors

In [10]:
mix_bi_densities = local_densities(mix_session, model_type='bi')
mix_bi_densities.to_csv('data/mix_bi_nn_villain_densities.csv')
mix_bi_densities

  and should_run_async(code)


node_type,hero,h_type,rel_ct,Comic,Hero,p_rel_ct,density
0,24-HOUR MAN/EMMANUEL,Hero,1,1,1,1,1.000000
1,3-D MAN/CHARLES CHAN,Hero,78,9,12,108,0.722222
2,4-D MAN/MERCURIO,Villain,54,9,12,108,0.500000
3,8-BALL/,Hero,3,3,1,3,1.000000
4,A,Hero,10,3,3,9,1.111111
...,...,...,...,...,...,...,...
6434,ZOTA,Hero,4,2,2,4,1.000000
6435,"ZOTA, CARLO",Hero,83,10,14,140,0.592857
6436,ZURAS,Hero,74,23,13,299,0.247492
6437,ZURI,Hero,109,21,16,336,0.324405


In [15]:
mix_bi_densities.describe()

  and should_run_async(code)


node_type,rel_ct,Comic,Hero,p_rel_ct,density
count,6439.0,6439.0,6439.0,6439.0,6439.0
mean,119.20438,14.925299,11.354248,1540.55521,1.177764
std,459.99888,59.268725,25.721677,15810.877476,0.971288
min,1.0,1.0,1.0,1.0,0.016211
25%,4.0,1.0,2.0,3.0,0.65292
50%,14.0,3.0,4.0,10.0,1.0
75%,55.0,8.0,10.0,70.0,1.5
max,9044.0,1577.0,393.0,555104.0,11.5
