# Entry 23b notebook: Next Nearest Neighbor Egonet Densities

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')
%matplotlib inline

import warnings
warnings.filterwarnings('once')

from neo4j import GraphDatabase

In [2]:
uri = "bolt://localhost:7687"

driver = GraphDatabase.driver(uri, auth=('neo4j', 'password'))

uni_session = driver.session(database="unimodal")
bi_session = driver.session(database="bimodal")
mix_session = driver.session(database="mixmodal")

  and should_run_async(code)


In [3]:
def local_densities(con, model_type='uni', max_level=2):
    if model_type == 'uni' or model_type == 'mix':
        rel_type = 'KNOWS'
        subgraph_query = '''MATCH (h:Hero)
        call apoc.path.subgraphAll(h, {maxLevel:'''  + f"{max_level}, relationshipFilter:'{rel_type}'" + '''})
        YIELD nodes, relationships
        RETURN h.name as hero, labels(h)[-1] as h_type, size(relationships) as rel_ct, 'na' as node_type, size(nodes) as node_ct'''
    else:
        max_level = max_level * 2
        rel_type = 'APPEARS_IN'
        subgraph_query = '''MATCH (h:Hero)
        call apoc.path.subgraphAll(h, {maxLevel:'''  + f"{max_level}, relationshipFilter:'{rel_type}'" + '''})
        YIELD nodes, relationships
        WITH h.name as hero, labels(h)[-1] as h_type, nodes, size(relationships) as rel_ct
        UNWIND nodes as indv_node
        RETURN hero, h_type, rel_ct, labels(indv_node)[0] as node_type, count(indv_node) as node_ct'''
    
    df = pd.DataFrame(con.run(subgraph_query).data())
    df.columns = ['hero', 'h_type', 'rel_ct', 'node_type', 'node_ct']
    
    if model_type == 'uni' or model_type == 'mix':
        df['p_rel_ct'] = (df['node_ct'] * (df['node_ct'] - 1)) / 2
    else:
        df = pd.pivot_table(df, index=['hero', 'h_type', 'rel_ct'], columns='node_type', values='node_ct').reset_index()
        df['p_rel_ct'] = df['Hero'] * df['Comic']
        
    df['density'] = df['rel_ct'] / df['p_rel_ct']
    return df

## Uni Densities Nearest Neighbors

In [4]:
uni_densities = local_densities(uni_session)
uni_densities.to_csv('data/uni_nnn_densities.csv')
uni_densities

Unnamed: 0,hero,h_type,rel_ct,node_type,node_ct,p_rel_ct,density
0,24-HOUR MAN/EMMANUEL,Hero,229,na,36,630.0,0.363492
1,3-D MAN/CHARLES CHAN,Hero,163160,na,5534,15309811.0,0.010657
2,4-D MAN/MERCURIO,Villain,140971,na,3822,7301931.0,0.019306
3,8-BALL/,Hero,3770,na,139,9591.0,0.393077
4,"ABBOTT, JACK",Hero,88796,na,2108,2220778.0,0.039984
...,...,...,...,...,...,...,...
6434,"ZOTA, CARLO",Hero,152142,na,4668,10892778.0,0.013967
6435,ZOTA,Hero,64116,na,1303,848253.0,0.075586
6436,ZURAS,Hero,110841,na,2565,3288330.0,0.033707
6437,ZURI,Hero,153453,na,4711,11094405.0,0.013832


In [8]:
uni_densities.describe()

  and should_run_async(code)


Unnamed: 0,rel_ct,node_ct,p_rel_ct,density
count,6439.0,6439.0,6439.0,6421.0
mean,95150.821711,2438.33499,4129075.0,0.06964
std,44733.989463,1521.666775,4586130.0,0.092168
min,0.0,1.0,0.0,0.008466
25%,62684.5,1226.5,751538.0,0.022545
50%,97548.0,2144.0,2297296.0,0.042113
75%,130385.5,3398.5,5773203.0,0.079354
max,171469.0,6365.0,20253430.0,1.0


## Bi Densities Nearest Neighbors

In [5]:
bi_densities = local_densities(bi_session, model_type='bi')
bi_densities.to_csv('data/bi_nnn_densities.csv')
bi_densities

  and should_run_async(code)


node_type,hero,h_type,rel_ct,Comic,Hero,p_rel_ct,density
0,24-HOUR MAN/EMMANUEL,Hero,174,22,36,792,0.219697
1,3-D MAN/CHARLES CHAN,Hero,82315,9742,5534,53912228,0.001527
2,4-D MAN/MERCURIO,Villain,48518,4612,3822,17627064,0.002752
3,8-BALL/,Hero,287,25,139,3475,0.082590
4,A,Hero,15106,1154,1616,1864864,0.008100
...,...,...,...,...,...,...,...
6434,ZOTA,Hero,8323,819,1303,1067157,0.007799
6435,"ZOTA, CARLO",Hero,65677,7227,4668,33735636,0.001947
6436,ZURAS,Hero,29719,2699,2565,6922935,0.004293
6437,ZURI,Hero,67464,7392,4711,34823712,0.001937


In [9]:
bi_densities.describe()

  and should_run_async(code)


node_type,rel_ct,Comic,Hero,p_rel_ct,density
count,6439.0,6439.0,6439.0,6439.0,6439.0
mean,27714.131387,2765.69545,2438.33499,10795910.0,0.01614
std,23946.524116,2755.62486,1521.666775,15741980.0,0.075823
min,1.0,1.0,1.0,1.0,0.001219
25%,8553.0,732.5,1226.5,910672.0,0.003053
50%,20123.0,1760.0,2144.0,3745749.0,0.005202
75%,39980.0,3846.0,3398.5,13196100.0,0.009383
max,95414.0,12322.0,6365.0,78269340.0,1.0


## Mixed Uni Densities Nearest Neighbors 

In [6]:
mix_uni_densities = local_densities(mix_session)
mix_uni_densities.to_csv('data/mix_uni_nnn_densities.csv')
mix_uni_densities

  and should_run_async(code)


Unnamed: 0,hero,h_type,rel_ct,node_type,node_ct,p_rel_ct,density
0,24-HOUR MAN/EMMANUEL,Hero,229,na,36,630.0,0.363492
1,3-D MAN/CHARLES CHAN,Hero,163160,na,5534,15309811.0,0.010657
2,4-D MAN/MERCURIO,Villain,140971,na,3822,7301931.0,0.019306
3,8-BALL/,Hero,3770,na,139,9591.0,0.393077
4,"ABBOTT, JACK",Hero,88796,na,2108,2220778.0,0.039984
...,...,...,...,...,...,...,...
6434,"ZOTA, CARLO",Hero,152142,na,4668,10892778.0,0.013967
6435,ZOTA,Hero,64116,na,1303,848253.0,0.075586
6436,ZURAS,Hero,110841,na,2565,3288330.0,0.033707
6437,ZURI,Hero,153453,na,4711,11094405.0,0.013832


In [10]:
mix_uni_densities.describe()

  and should_run_async(code)


Unnamed: 0,rel_ct,node_ct,p_rel_ct,density
count,6439.0,6439.0,6439.0,6421.0
mean,95150.821711,2438.33499,4129075.0,0.06964
std,44733.989463,1521.666775,4586130.0,0.092168
min,0.0,1.0,0.0,0.008466
25%,62684.5,1226.5,751538.0,0.022545
50%,97548.0,2144.0,2297296.0,0.042113
75%,130385.5,3398.5,5773203.0,0.079354
max,171469.0,6365.0,20253430.0,1.0


## Mixed Bi Densities Nearest Neighbors

In [7]:
mix_bi_densities = local_densities(mix_session, model_type='bi')
mix_bi_densities.to_csv('data/mix_bi_nnn_densities.csv')
mix_bi_densities

  and should_run_async(code)


node_type,hero,h_type,rel_ct,Comic,Hero,p_rel_ct,density
0,24-HOUR MAN/EMMANUEL,Hero,403,22,36,792,0.508838
1,3-D MAN/CHARLES CHAN,Hero,245475,9742,5534,53912228,0.004553
2,4-D MAN/MERCURIO,Villain,189489,4612,3822,17627064,0.010750
3,8-BALL/,Hero,4057,25,139,3475,1.167482
4,A,Hero,94952,1154,1616,1864864,0.050916
...,...,...,...,...,...,...,...
6434,ZOTA,Hero,72439,819,1303,1067157,0.067880
6435,"ZOTA, CARLO",Hero,217819,7227,4668,33735636,0.006457
6436,ZURAS,Hero,140560,2699,2565,6922935,0.020304
6437,ZURI,Hero,220917,7392,4711,34823712,0.006344


In [11]:
mix_bi_densities.describe()

  and should_run_async(code)


node_type,rel_ct,Comic,Hero,p_rel_ct,density
count,6439.0,6439.0,6439.0,6439.0,6439.0
mean,122864.953098,2765.69545,2438.33499,10795910.0,0.099397
std,67270.822474,2755.62486,1521.666775,15741980.0,0.331277
min,1.0,1.0,1.0,1.0,0.003408
25%,71194.0,732.5,1226.5,910672.0,0.013058
50%,117726.0,1760.0,2144.0,3745749.0,0.030438
75%,170500.0,3846.0,3398.5,13196100.0,0.075792
max,266780.0,12322.0,6365.0,78269340.0,15.634892
