# Entry 23a notebook: Nearest Neighbor Egonet Densities

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')
%matplotlib inline

import warnings
warnings.filterwarnings('once')

from neo4j import GraphDatabase

In [2]:
uri = "bolt://localhost:7687"

driver = GraphDatabase.driver(uri, auth=('neo4j', 'password'))

uni_session = driver.session(database="unimodal")
bi_session = driver.session(database="bimodal")
mix_session = driver.session(database="mixmodal")

  and should_run_async(code)


In [9]:
def local_densities(con, model_type='uni', max_level=1):
    if model_type == 'uni' or model_type == 'mix':
        rel_type = 'KNOWS'
        subgraph_query = '''MATCH (h:Hero)
        call apoc.path.subgraphAll(h, {maxLevel:'''  + f"{max_level}, relationshipFilter:'{rel_type}'" + '''})
        YIELD nodes, relationships
        RETURN h.name as hero, labels(h)[-1] as h_type, size(relationships) as rel_ct, 'na' as node_type, size(nodes) as node_ct'''
    else:
        max_level = max_level * 2
        rel_type = 'APPEARS_IN'
        subgraph_query = '''MATCH (h:Hero)
        call apoc.path.subgraphAll(h, {maxLevel:'''  + f"{max_level}, relationshipFilter:'{rel_type}'" + '''})
        YIELD nodes, relationships
        WITH h.name as hero, labels(h)[-1] as h_type, nodes, size(relationships) as rel_ct
        UNWIND nodes as indv_node
        RETURN hero, h_type, rel_ct, labels(indv_node)[0] as node_type, count(indv_node) as node_ct'''
    
    df = pd.DataFrame(con.run(subgraph_query).data())
    df.columns = ['hero', 'h_type', 'rel_ct', 'node_type', 'node_ct']
    
    if model_type == 'uni' or model_type == 'mix':
        df['p_rel_ct'] = (df['node_ct'] * (df['node_ct'] - 1)) / 2
    else:
        df = pd.pivot_table(df, index=['hero', 'h_type', 'rel_ct'], columns='node_type', values='node_ct').reset_index()
        df['p_rel_ct'] = df['Hero'] * df['Comic']
        
    df['density'] = df['rel_ct'] / df['p_rel_ct']
    return df

## Uni Densities Nearest Neighbors

In [4]:
uni_densities = local_densities(uni_session)
uni_densities.to_csv('data/uni_nn_densities.csv', index=False)
uni_densities

Unnamed: 0,hero,h_type,rel_ct,node_type,node_ct,p_rel_ct,density
0,24-HOUR MAN/EMMANUEL,Hero,15,na,6,15.0,1.000000
1,3-D MAN/CHARLES CHAN,Hero,6645,na,123,7503.0,0.885646
2,4-D MAN/MERCURIO,Villain,1271,na,73,2628.0,0.483638
3,8-BALL/,Hero,81,na,15,105.0,0.771429
4,"ABBOTT, JACK",Hero,36,na,9,36.0,1.000000
...,...,...,...,...,...,...,...
6434,"ZOTA, CARLO",Hero,1680,na,69,2346.0,0.716113
6435,ZOTA,Hero,33,na,9,36.0,0.916667
6436,ZURAS,Hero,1811,na,93,4278.0,0.423329
6437,ZURI,Hero,2101,na,99,4851.0,0.433107


In [5]:
uni_densities.describe()

  and should_run_async(code)


Unnamed: 0,rel_ct,node_ct,p_rel_ct,density
count,6439.0,6439.0,6439.0,6421.0
mean,1652.297562,54.313869,8247.615,0.792685
std,5928.968707,116.626159,61121.15,0.236162
min,0.0,1.0,0.0,0.050879
25%,55.0,11.0,55.0,0.632576
50%,190.0,21.0,210.0,0.885714
75%,769.5,49.0,1176.0,1.0
max,93812.0,1920.0,1842240.0,1.0


## Bi Densities Nearest Neighbors

In [None]:
bi_densities = local_densities(bi_session, model_type='bi')
bi_densities.to_csv('data/bi_nn_densities.csv', index=False)
bi_densities

In [7]:
bi_densities.describe()

  and should_run_async(code)


node_type,rel_ct,Comic,Hero,p_rel_ct,density
count,6439.0,6439.0,6439.0,6439.0,6439.0
mean,194.820003,14.925299,54.313869,7156.255,0.59569
std,784.811139,59.268725,116.626159,75172.46,0.33997
min,1.0,1.0,1.0,1.0,0.005531
25%,15.0,1.0,11.0,17.0,0.28125
50%,37.0,3.0,21.0,60.0,0.606061
75%,111.0,8.0,49.0,371.5,1.0
max,17391.0,1577.0,1920.0,2767635.0,1.0


## Mixed Uni Densities Nearest Neighbors 

In [8]:
mix_uni_densities = local_densities(mix_session)
mix_uni_densities.to_csv('data/mix_uni_nn_densities.csv', index=False)
mix_uni_densities

  and should_run_async(code)


Unnamed: 0,hero,h_type,rel_ct,node_type,node_ct,p_rel_ct,density
0,24-HOUR MAN/EMMANUEL,Hero,15,na,6,15.0,1.000000
1,3-D MAN/CHARLES CHAN,Hero,6645,na,123,7503.0,0.885646
2,4-D MAN/MERCURIO,Villain,1271,na,73,2628.0,0.483638
3,8-BALL/,Hero,81,na,15,105.0,0.771429
4,"ABBOTT, JACK",Hero,36,na,9,36.0,1.000000
...,...,...,...,...,...,...,...
6434,"ZOTA, CARLO",Hero,1680,na,69,2346.0,0.716113
6435,ZOTA,Hero,33,na,9,36.0,0.916667
6436,ZURAS,Hero,1811,na,93,4278.0,0.423329
6437,ZURI,Hero,2101,na,99,4851.0,0.433107


In [9]:
mix_uni_densities.describe()

  and should_run_async(code)


Unnamed: 0,rel_ct,node_ct,p_rel_ct,density
count,6439.0,6439.0,6439.0,6421.0
mean,1652.297562,54.313869,8247.615,0.792685
std,5928.968707,116.626159,61121.15,0.236162
min,0.0,1.0,0.0,0.050879
25%,55.0,11.0,55.0,0.632576
50%,190.0,21.0,210.0,0.885714
75%,769.5,49.0,1176.0,1.0
max,93812.0,1920.0,1842240.0,1.0


## Mixed Bi Densities Nearest Neighbors

In [4]:
mix_bi_densities = local_densities(mix_session, model_type='bi')
mix_bi_densities.to_csv('data/mix_bi_nn_densities.csv', index=False)
mix_bi_densities

node_type,hero,h_type,rel_ct,Comic,Hero,p_rel_ct,density
0,24-HOUR MAN/EMMANUEL,Hero,21,1,6,6,3.500000
1,3-D MAN/CHARLES CHAN,Hero,6802,9,123,1107,6.144535
2,4-D MAN/MERCURIO,Villain,1398,9,73,657,2.127854
3,8-BALL/,Hero,105,3,15,45,2.333333
4,A,Hero,56,3,9,27,2.074074
...,...,...,...,...,...,...,...
6434,ZOTA,Hero,45,2,9,18,2.500000
6435,"ZOTA, CARLO",Hero,1802,10,69,690,2.611594
6436,ZURAS,Hero,2130,23,93,2139,0.995792
6437,ZURI,Hero,2385,21,99,2079,1.147186


In [5]:
mix_bi_densities.describe()

  and should_run_async(code)


node_type,rel_ct,Comic,Hero,p_rel_ct,density
count,6439.0,6439.0,6439.0,6439.0,6439.0
mean,1847.117565,14.925299,54.313869,7156.255,3.966449
std,6683.910176,59.268725,116.626159,75172.46,4.070162
min,1.0,1.0,1.0,1.0,0.033825
25%,78.0,1.0,11.0,17.0,1.576085
50%,229.0,3.0,21.0,60.0,2.888889
75%,886.5,8.0,49.0,371.5,5.0
max,111203.0,1577.0,1920.0,2767635.0,46.0
