# Entry G22 notebook: Mean Distance Between Connected Nodes

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')
%matplotlib inline

import warnings
warnings.filterwarnings('once')

from neo4j import GraphDatabase

In [4]:
def nearest_villains(con, model_type='uni', max_level=2):
    if model_type == 'uni' or model_type == 'mix':
        rel_type = 'KNOWS'
    else:
        max_level = max_level * 2
        rel_type = 'APPEARS_IN'
        
    v_query = '''MATCH (h:Hero)
    call apoc.path.spanningTree(h, {minLevel: 1, maxLevel:'''  + f"{max_level}, labelFilter:'>Villain', relationshipFilter:'{rel_type}'" + '''})
    YIELD path
    RETURN h.name as name, labels(h)[-1] as type, length(path) as distance, count(path) as villain_ct'''
    
    h_query = '''MATCH (h:Hero)
    call apoc.path.spanningTree(h, {minLevel: 1, maxLevel:'''  + f"{max_level}, labelFilter:'>Hero', relationshipFilter:'{rel_type}'" + '''})
    YIELD path
    RETURN h.name as name, labels(h)[-1] as type, length(path) as distance, count(path) as total_ct'''
    
    df = pd.DataFrame(con.run(v_query).data()).merge(
    pd.DataFrame(con.run(h_query).data()),
    on = ['name', 'type', 'distance'], how='outer').fillna(0)
    df['villain_pct'] = df['villain_ct'] / df['total_ct']
    return df

  and should_run_async(code)


In [5]:
uri = "bolt://localhost:7687"

driver = GraphDatabase.driver(uri, auth=('neo4j', 'password'))

uni_session = driver.session(database="unimodal")

### Neighbor Counts and Villain Percent

In [24]:
uni_neighbors = pd.DataFrame(uni_session.run('''MATCH (h:Hero)
    call apoc.path.spanningTree(h, {minLevel: 1, labelFilter:'>Hero'})
    YIELD path
    RETURN h.name as name, labels(h)[-1] as type, length(path) as distance, count(path) as total_ct''').data())
uni_neighbors

Unnamed: 0,name,type,distance,total_ct
0,24-HOUR MAN/EMMANUEL,Hero,1,5
1,24-HOUR MAN/EMMANUEL,Hero,2,30
2,24-HOUR MAN/EMMANUEL,Hero,3,1281
3,24-HOUR MAN/EMMANUEL,Hero,4,5008
4,24-HOUR MAN/EMMANUEL,Hero,5,78
...,...,...,...,...
25537,ZURI,Hero,4,10
25538,ZZZAX,Villain,1,30
25539,ZZZAX,Villain,2,3150
25540,ZZZAX,Villain,3,3207


In [25]:
total_peeps = uni_neighbors['name'].nunique()

  and should_run_async(code)


In [31]:
total_peeps

6421

In [26]:
mean_dist_df = uni_neighbors[['distance', 'total_ct']].groupby('distance').agg('sum')
mean_dist_df = mean_dist_df.reset_index()
mean_dist_df

Unnamed: 0,distance,total_ct
0,1,343288
1,2,15350712
2,3,24418328
3,4,876840
4,5,2954


In [27]:
mean_dist_df['total_nbr_steps'] = mean_dist_df['total_ct'] * mean_dist_df['distance']
mean_dist_df

  and should_run_async(code)


Unnamed: 0,distance,total_ct,total_nbr_steps
0,1,343288,343288
1,2,15350712,30701424
2,3,24418328,73254984
3,4,876840,3507360
4,5,2954,14770


In [29]:
mean_dist_df['total_nbr_steps'].agg('sum')

107821826

In [30]:
(total_peeps * (total_peeps-1))

41222820

In [28]:
mean_dist_df['total_nbr_steps'].agg('sum')/(total_peeps * (total_peeps-1))

  and should_run_async(code)


2.61558588180042