# Entry 23d notebook: Next Nearest Villain Neighbor Egonet Densities

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')
%matplotlib inline

import warnings
warnings.filterwarnings('once')

from neo4j import GraphDatabase

In [3]:
uri = "bolt://localhost:7687"

driver = GraphDatabase.driver(uri, auth=('neo4j', 'password'))

uni_session = driver.session(database="unimodal")
bi_session = driver.session(database="bimodal")
mix_session = driver.session(database="mixmodal")

  and should_run_async(code)


In [1]:
def local_densities(con, model_type='uni', max_level=2):
    if model_type == 'uni' or model_type == 'mix':
        rel_type = 'KNOWS'
        subgraph_query = '''MATCH (h:Hero)
        call apoc.path.subgraphAll(h, {maxLevel:'''  + f"{max_level}, labelFilter:'Villain', relationshipFilter:'{rel_type}'" + '''})
        YIELD nodes, relationships
        RETURN h.name as hero, labels(h)[-1] as h_type, size(relationships) as rel_ct, 'na' as node_type, size(nodes) as node_ct'''
    else:
        max_level = max_level * 2
        rel_type = 'APPEARS_IN'
        subgraph_query = '''MATCH (h:Hero)
        call apoc.path.subgraphAll(h, {maxLevel:'''  + f"{max_level}, labelFilter:'Villain|Comic', relationshipFilter:'{rel_type}'" + '''})
        YIELD nodes, relationships
        WITH h.name as hero, labels(h)[-1] as h_type, nodes, size(relationships) as rel_ct
        UNWIND nodes as indv_node
        RETURN hero, h_type, rel_ct, labels(indv_node)[0] as node_type, count(indv_node) as node_ct'''
    
    df = pd.DataFrame(con.run(subgraph_query).data())
    df.columns = ['hero', 'h_type', 'rel_ct', 'node_type', 'node_ct']
    
    if model_type == 'uni' or model_type == 'mix':
        df['p_rel_ct'] = (df['node_ct'] * (df['node_ct'] - 1)) / 2
    else:
        df = pd.pivot_table(df, index=['hero', 'h_type', 'rel_ct'], columns='node_type', values='node_ct').reset_index()
        df['p_rel_ct'] = df['Hero'] * df['Comic']
        
    df['density'] = df['rel_ct'] / df['p_rel_ct']
    return df

## Uni Densities Nearest Neighbors

In [4]:
uni_densities = local_densities(uni_session)
uni_densities.to_csv('data/uni_nnn_villain_densities.csv', index=False)
uni_densities

Unnamed: 0,hero,h_type,rel_ct,node_type,node_ct,p_rel_ct,density
0,24-HOUR MAN/EMMANUEL,Hero,0,na,1,0.0,
1,3-D MAN/CHARLES CHAN,Hero,3785,na,306,46665.0,0.081110
2,4-D MAN/MERCURIO,Villain,2828,na,258,33153.0,0.085301
3,8-BALL/,Hero,0,na,1,0.0,
4,"ABBOTT, JACK",Hero,1517,na,146,10585.0,0.143316
...,...,...,...,...,...,...,...
6434,"ZOTA, CARLO",Hero,4500,na,343,58653.0,0.076722
6435,ZOTA,Hero,762,na,86,3655.0,0.208482
6436,ZURAS,Hero,1923,na,194,18721.0,0.102719
6437,ZURI,Hero,4496,na,379,71631.0,0.062766


In [8]:
uni_densities.describe()

  and should_run_async(code)


Unnamed: 0,rel_ct,node_ct,p_rel_ct,density
count,6439.0,6439.0,6439.0,5266.0
mean,1700.582078,150.681938,24192.566858,0.219426
std,1930.940225,160.732017,41912.069849,0.212462
min,0.0,1.0,0.0,0.027701
25%,20.0,10.0,45.0,0.089583
50%,902.0,94.0,4371.0,0.143653
75%,2783.5,236.0,27730.0,0.268074
max,7509.0,736.0,270480.0,1.0


## Bi Densities Nearest Neighbors

In [5]:
bi_densities = local_densities(bi_session, model_type='bi')
bi_densities.to_csv('data/bi_nnn_villain_densities.csv', index=False)
bi_densities

  and should_run_async(code)


node_type,hero,h_type,rel_ct,Comic,Hero,p_rel_ct,density
0,24-HOUR MAN/EMMANUEL,Hero,1,1,1,1,1.000000
1,3-D MAN/CHARLES CHAN,Hero,2131,705,306,215730,0.009878
2,4-D MAN/MERCURIO,Villain,2177,857,258,221106,0.009846
3,8-BALL/,Hero,3,3,1,3,1.000000
4,A,Hero,275,90,61,5490,0.050091
...,...,...,...,...,...,...,...
6434,ZOTA,Hero,342,98,86,8428,0.040579
6435,"ZOTA, CARLO",Hero,3387,1043,343,357749,0.009468
6436,ZURAS,Hero,1065,409,194,79346,0.013422
6437,ZURI,Hero,2916,1064,379,403256,0.007231


In [9]:
bi_densities.describe()

  and should_run_async(code)


node_type,rel_ct,Comic,Hero,p_rel_ct,density
count,6439.0,6439.0,6439.0,6439.0,6439.0
mean,1302.03308,449.968629,150.681938,178249.1,0.24713
std,2050.090103,767.334809,160.732017,461227.1,0.385898
min,1.0,1.0,1.0,1.0,0.003181
25%,34.0,16.0,10.0,166.5,0.012292
50%,469.0,148.0,94.0,13764.0,0.03452
75%,1706.0,577.0,236.0,129666.0,0.226128
max,15599.0,6633.0,736.0,4881888.0,1.0


## Mixed Uni Densities Nearest Neighbors 

In [6]:
mix_uni_densities = local_densities(mix_session)
mix_uni_densities.to_csv('data/mix_uni_nnn_villain_densities.csv', index=False)
mix_uni_densities

  and should_run_async(code)


Unnamed: 0,hero,h_type,rel_ct,node_type,node_ct,p_rel_ct,density
0,24-HOUR MAN/EMMANUEL,Hero,0,na,1,0.0,
1,3-D MAN/CHARLES CHAN,Hero,3785,na,306,46665.0,0.081110
2,4-D MAN/MERCURIO,Villain,2828,na,258,33153.0,0.085301
3,8-BALL/,Hero,0,na,1,0.0,
4,"ABBOTT, JACK",Hero,1517,na,146,10585.0,0.143316
...,...,...,...,...,...,...,...
6434,"ZOTA, CARLO",Hero,4500,na,343,58653.0,0.076722
6435,ZOTA,Hero,762,na,86,3655.0,0.208482
6436,ZURAS,Hero,1923,na,194,18721.0,0.102719
6437,ZURI,Hero,4496,na,379,71631.0,0.062766


In [10]:
mix_uni_densities.describe()

  and should_run_async(code)


Unnamed: 0,rel_ct,node_ct,p_rel_ct,density
count,6439.0,6439.0,6439.0,5266.0
mean,1700.582078,150.681938,24192.566858,0.219426
std,1930.940225,160.732017,41912.069849,0.212462
min,0.0,1.0,0.0,0.027701
25%,20.0,10.0,45.0,0.089583
50%,902.0,94.0,4371.0,0.143653
75%,2783.5,236.0,27730.0,0.268074
max,7509.0,736.0,270480.0,1.0


## Mixed Bi Densities Nearest Neighbors

In [7]:
mix_bi_densities = local_densities(mix_session, model_type='bi')
mix_bi_densities.to_csv('data/mix_bi_nnn_villain_densities.csv', index=False)
mix_bi_densities

  and should_run_async(code)


node_type,hero,h_type,rel_ct,Comic,Hero,p_rel_ct,density
0,24-HOUR MAN/EMMANUEL,Hero,1,1,1,1,1.000000
1,3-D MAN/CHARLES CHAN,Hero,5916,705,306,215730,0.027423
2,4-D MAN/MERCURIO,Villain,5005,857,258,221106,0.022636
3,8-BALL/,Hero,3,3,1,3,1.000000
4,A,Hero,848,90,61,5490,0.154463
...,...,...,...,...,...,...,...
6434,ZOTA,Hero,1104,98,86,8428,0.130992
6435,"ZOTA, CARLO",Hero,7887,1043,343,357749,0.022046
6436,ZURAS,Hero,2988,409,194,79346,0.037658
6437,ZURI,Hero,7412,1064,379,403256,0.018380


In [11]:
mix_bi_densities.describe()

  and should_run_async(code)


node_type,rel_ct,Comic,Hero,p_rel_ct,density
count,6439.0,6439.0,6439.0,6439.0,6439.0
mean,3002.615158,449.968629,150.681938,178249.1,0.305504
std,3885.858198,767.334809,160.732017,461227.1,0.391141
min,1.0,1.0,1.0,1.0,0.004719
25%,59.0,16.0,10.0,166.5,0.032281
50%,1390.0,148.0,94.0,13764.0,0.095677
75%,4420.0,577.0,236.0,129666.0,0.408333
max,23051.0,6633.0,736.0,4881888.0,1.88
