# Entrry 20 notebook: Shortest Path to Villain

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')
%matplotlib inline

import warnings
warnings.filterwarnings('once')

from neo4j import GraphDatabase

In [5]:
uri = "bolt://localhost:7687"

driver = GraphDatabase.driver(uri, auth=('neo4j', 'password'))

uni_session = driver.session(database="unimodal")
bi_session = driver.session(database="bimodal")
mix_session = driver.session(database="mixmodal")

  and should_run_async(code)


In [6]:
uni_session.run('MATCH (h:Hero) RETURN count(h)').data()

[{'count(h)': 6439}]

## Shortest path to villain

MATCH (h:Hero)
CALL apoc.path.spanningTree(h, {labelFilter:'Villain', maxLevel:6}) YIELD path
RETURN length(path) as shortestPathToAVillain

In [11]:
test= pd.DataFrame(uni_session.run('''MATCH (h:Hero)
call apoc.path.spanningTree(h, {minLevel: 1, maxLevel:2, labelFilter:'>Villain'})
YIELD path
RETURN h.name as name, length(path) as distance, count(path) as villain_ct''').data()).merge(
pd.DataFrame(uni_session.run('''MATCH (h:Hero)
    call apoc.path.spanningTree(h,  {minLevel: 1, maxLevel:2, labelFilter:'>Hero'})
    YIELD path
    RETURN h.name as name, length(path) as distance, count(path) as total_ct''').data()),
on=['name', 'distance'], how='outer')

In [12]:
test

Unnamed: 0,name,distance,villain_ct,total_ct
0,24-HOUR MAN/EMMANUEL,2,3.0,30
1,3-D MAN/CHARLES CHAN,1,11.0,122
2,3-D MAN/CHARLES CHAN,2,814.0,5411
3,4-D MAN/MERCURIO,1,11.0,72
4,4-D MAN/MERCURIO,2,635.0,3749
...,...,...,...,...
12822,ZEAKLAR,1,,2
12823,"ZEITGEIST, NICOLA",1,,11
12824,"ZELIG, LON",1,,11
12825,ZET,1,,5


In [None]:
pd.DataFrame(uni_session.run('''MATCH (h:Hero)
call apoc.path.spanningTree(h, {minLevel: 1, maxLevel:2, labelFilter:'>Villain'})
YIELD path
RETURN h.name as name, length(path) as distance, count(path) as velocity''').data())

bi_villains = '''MATCH (h:Hero)
call apoc.path.spanningTree(h, {minLevel: 1, maxLevel:4, labelFilter:'>Villain'})
YIELD path
RETURN h.name as name, length(path) as distance, count(path) as velocity
'''

u_mix_villains = '''MATCH (h:Hero)
call apoc.path.spanningTree(h, {minLevel: 1, maxLevel:2, labelFilter:'>Villain', relationshipFilter:'KNOWS'})
YIELD path
RETURN h.name as name, length(path) as distance, count(path) as velocity
'''

b_mix_villains = '''MATCH (h:Hero)
call apoc.path.spanningTree(h, {minLevel: 1, maxLevel:4, labelFilter:'>Villain', relationshipFilter:'APPEARS_IN'})
YIELD path
RETURN h.name as name, length(path) as distance, count(path) as velocity
'''

In [1]:
max_level = '2'
label = 'Villain'

'''MATCH (h:Hero)
call apoc.path.spanningTree(h, {minLevel: 1, maxLevel:'''  + f"{max_level}, labelFilter:'>{label}'" + '''})
YIELD path
RETURN h.name as name, length(path) as distance, count(path) as velocity'''

"MATCH (h:Hero)\ncall apoc.path.spanningTree(h, {minLevel: 1, maxLevel:2, labelFilter:'>Villain'})\nYIELD path\nRETURN h.name as name, length(path) as distance, count(path) as velocity"

In [2]:
max_level = '4'
label = 'Hero'

'''MATCH (h:Hero)
call apoc.path.spanningTree(h, {minLevel: 1, maxLevel:'''  + f"{max_level}, labelFilter:'>{label}', relationshipFilter:'{rel_type}'" + '''})
YIELD path
RETURN h.name as name, length(path) as distance, count(path) as velocity'''

"MATCH (h:Hero)\ncall apoc.path.spanningTree(h, {minLevel: 1, maxLevel:4, labelFilter:'>Hero'})\nYIELD path\nRETURN h.name as name, length(path) as distance, count(path) as velocity"

In [None]:
def nearest_villains(con, model_type='uni'):
    if model_type == 'uni' or model_type == 'mix':
        max_level = 2
        rel_type = 'KNOWS'
    else:
        max_level = 4
        rel_type = 'APPEARS_IN'
        
    v_query = '''MATCH (h:Hero)
    call apoc.path.spanningTree(h, {minLevel: 1, maxLevel:'''  +
    f"{max_level}, labelFilter:'>Villain', relationshipFilter:'{rel_type}'" + '''})
    YIELD path
    RETURN h.name as name, length(path) as distance, count(path) as villain_ct'''
    
    h_query = '''MATCH (h:Hero)
    call apoc.path.spanningTree(h, {minLevel: 1, maxLevel:'''  +
    f"{max_level}, labelFilter:'>Hero', relationshipFilter:'{rel_type}'" + '''})
    YIELD path
    RETURN h.name as name, length(path) as distance, count(path) as total_ct'''
    
    df = pd.DataFrame(con.run(v_query).data()).merge(
    pd.DataFrame(con.run(h_query).data()),
    on = ['name', 'distance'], how='outer').fillna(0).reset_index()
    df['villain_pct'] = df['villain_ct'] / df['total_ct']
    return df

In [None]:
nearest_villain_query = '''MATCH (h:Hero)
call apoc.path.spanningTree(h, {minLevel: 1, maxLevel:6, labelFilter:'/Villain', limit 1})
YIELD path
RETURN h.name as name, length(path) as distance'''

In [48]:
def local_densities(con, hero_query, subgraph_query, model_type='uni'):
    df = pd.pivot_table(pd.DataFrame(con.run(hero_query).data()),
                       index='hero', columns='nn_type', values='nn_ct').fillna(0).reset_index().merge(
    pd.DataFrame(con.run(subgraph_query).data()),
    on='hero', how='outer')
    df['villain_pct']  = df['Villian'] / df['nn_ct'] * 100
    if model_type == 'uni' or model_type == 'mix':
        df['p_rel_ct'] = (df['nn_ct'] * (df['nn_ct'] - 1)) / 2
    else:
        df['p_rel_ct'] = (df['Hero'] + df['Villian']) * df['Comic']
    df['density'] = df['rel_ct'] / df['p_rel_ct']
    return df

## Uni Densities Next Nearest Neighbors

In [49]:
uni_heroes= '''
MATCH (h:Hero)
OPTIONAL MATCH (h)-[*..2]-(nn)
RETURN h.name as hero, labels(nn)[-1] as nn_type, count(nn) as nn_ct'''

uni_subgraph='''
MATCH (h:Hero)
call apoc.path.subgraphAll(h, {maxLevel:2})
YIELD nodes, relationships
RETURN h.name as hero, labels(h)[-1] as h_type, size(nodes)-1 as nn_ct, size(relationships) as rel_ct'''

uni_nnn_densities = local_densities(uni_session, uni_heroes, uni_subgraph)
uni_nnn_densities

Unnamed: 0,hero,Hero,Villian,h_type,nn_ct,rel_ct,villain_pct,p_rel_ct,density
0,24-HOUR MAN/EMMANUEL,5.0,0.0,Hero,5,15,0.000000,10.0,1.500000
1,3-D MAN/CHARLES CHAN,111.0,11.0,Hero,122,6645,9.016393,7381.0,0.900285
2,4-D MAN/MERCURIO,61.0,11.0,Villian,72,1271,15.277778,2556.0,0.497261
3,8-BALL/,14.0,0.0,Hero,14,81,0.000000,91.0,0.890110
4,A,6.0,2.0,Hero,8,36,25.000000,28.0,1.285714
...,...,...,...,...,...,...,...,...,...
6434,SEA LEOPARD,,,Hero,0,0,,0.0,
6435,"GERVASE, LADY ALYSSA",,,Hero,0,0,,0.0,
6436,SHARKSKIN,,,Hero,0,0,,0.0,
6437,"GIURESCU, RADU",,,Hero,0,0,,0.0,


## Bi Densities Next Nearest Neighbors

In [None]:
bi_heroes= '''
MATCH (h:Hero)
OPTIONAL MATCH (h)-[*..4]-(o)
RETURN h.name as hero, labels(o)[-1] as nn_type, count(o) as nn_ct'''

bi_subgraph='''
MATCH (h:Hero)
call apoc.path.subgraphAll(h, {maxLevel:4})
YIELD nodes, relationships
RETURN h.name as hero, labels(h)[-1] as h_type, size(nodes)-1 as nn_ct, size(relationships) as rel_ct'''

In [None]:
bi_nnn_densities = local_densities(bi_session, bi_heroes, bi_subgraph)
bi_nnn_densities

## Mixed Uni Densities Next Nearest Neighbors

In [None]:
mix_uni_heroes= '''
MATCH (h:Hero)
OPTIONAL MATCH (h)-[:KNOWS*..2]-(o)
RETURN h.name as hero, labels(o)[-1] as nn_type, count(o) as nn_ct'''

mix_uni_subgraph='''
MATCH (h:Hero)
call apoc.path.subgraphAll(h, {maxLevel:2, relationshipFilter: "KNOWS"})
YIELD nodes, relationships
RETURN h.name as hero, labels(h)[-1] as h_type, size(nodes)-1 as nn_ct, size(relationships) as rel_ct'''

In [None]:
mix_uni_nn_densities = local_densities(mix_session, mix_uni_heroes, mix_uni_subgraph)
mix_uni_nn_densities

## Mixed Bi Densities Next Nearest Neighbors

In [None]:
mix_bi_heroes= '''
MATCH (h:Hero)
OPTIONAL MATCH (h)-[:APPEARS_IN*..4]-(o)
RETURN h.name as hero, labels(o)[-1] as nn_type, count(o) as nn_ct'''

mix_bi_subgraph='''
MATCH (h:Hero)
call apoc.path.subgraphAll(h, {maxLevel:4, relationshipFilter: "APPEARS_IN"})
YIELD nodes, relationships
RETURN h.name as hero, labels(h)[-1] as h_type, size(nodes)-1 as nn_ct, size(relationships) as rel_ct'''

In [None]:
mix_bi_nnn_densities = local_densities(mix_session, mix_bi_heroes, mix_bi_subgraph)
mix_bi_nnn_densities