In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('ggplot')
%matplotlib inline

import warnings
warnings.filterwarnings('once')

from neo4j import GraphDatabase

In [9]:
uri = "bolt://localhost:7687"

driver = GraphDatabase.driver(uri, auth=('neo4j', 'password'))

uni_session = driver.session(database="unimodal")
bi_session = driver.session(database="bimodal")
mix_session = driver.session(database="mixmodal")

In [10]:
uni_session.run('MATCH (h:Hero) RETURN count(h)').data()

[{'count(h)': 6439}]

In [48]:
def local_densities(con, hero_query, subgraph_query, model_type='uni'):
    df = pd.pivot_table(pd.DataFrame(con.run(hero_query).data()),
                       index='hero', columns='nn_type', values='nn_ct').fillna(0).reset_index().merge(
    pd.DataFrame(con.run(subgraph_query).data()),
    on='hero', how='outer')
    df['villain_pct']  = df['Villian'] / df['nn_ct'] * 100
    if model_type == 'uni' or model_type == 'mix':
        df['p_rel_ct'] = (df['nn_ct'] * (df['nn_ct'] - 1)) / 2
    else:
        df['p_rel_ct'] = (df['Hero'] + df['Villian']) * df['Comic']
    df['density'] = df['rel_ct'] / df['p_rel_ct']
    return df

## Uni Densities Nearest Neighbors

In [49]:
uni_heroes= '''
MATCH (h:Hero)
OPTIONAL MATCH (h)-[]-(nn)
RETURN h.name as hero, labels(nn)[-1] as nn_type, count(nn) as nn_ct'''

uni_subgraph='''
MATCH (h:Hero)
call apoc.path.subgraphAll(h, {maxLevel:1})
YIELD nodes, relationships
RETURN h.name as hero, labels(h)[-1] as h_type, size(nodes)-1 as nn_ct, size(relationships) as rel_ct'''

uni_nn_densities = local_densities(uni_session, uni_heroes, uni_subgraph)
uni_nn_densities

Unnamed: 0,hero,Hero,Villian,h_type,nn_ct,rel_ct,villain_pct,p_rel_ct,density
0,24-HOUR MAN/EMMANUEL,5.0,0.0,Hero,5,15,0.000000,10.0,1.500000
1,3-D MAN/CHARLES CHAN,111.0,11.0,Hero,122,6645,9.016393,7381.0,0.900285
2,4-D MAN/MERCURIO,61.0,11.0,Villian,72,1271,15.277778,2556.0,0.497261
3,8-BALL/,14.0,0.0,Hero,14,81,0.000000,91.0,0.890110
4,A,6.0,2.0,Hero,8,36,25.000000,28.0,1.285714
...,...,...,...,...,...,...,...,...,...
6434,SEA LEOPARD,,,Hero,0,0,,0.0,
6435,"GERVASE, LADY ALYSSA",,,Hero,0,0,,0.0,
6436,SHARKSKIN,,,Hero,0,0,,0.0,
6437,"GIURESCU, RADU",,,Hero,0,0,,0.0,


## Bi Densities Nearest Neighbors

In [None]:
bi_heroes= '''
MATCH (h:Hero)
OPTIONAL MATCH (h)-[*..2]-(o)
RETURN h.name as hero, labels(o)[-1] as nn_type, count(o) as nn_ct'''

bi_subgraph='''
MATCH (h:Hero)
call apoc.path.subgraphAll(h, {maxLevel:2})
YIELD nodes, relationships
RETURN h.name as hero, labels(h)[-1] as h_type, size(nodes)-1 as nn_ct, size(relationships) as rel_ct'''

In [None]:
bi_nn_densities = local_densities(bi_session, bi_heroes, bi_subgraph)
bi_nn_densities

## Mixed Uni Densities Nearest Neighbors 

In [None]:
mix_uni_heroes= '''
MATCH (h:Hero)
OPTIONAL MATCH (h)-[:KNOWS]-(o)
RETURN h.name as hero, labels(o)[-1] as nn_type, count(o) as nn_ct'''

mix_uni_subgraph='''
MATCH (h:Hero)
call apoc.path.subgraphAll(h, {maxLevel:1, relationshipFilter: "KNOWS"})
YIELD nodes, relationships
RETURN h.name as hero, labels(h)[-1] as h_type, size(nodes)-1 as nn_ct, size(relationships) as rel_ct'''

In [None]:
mix_uni_nn_densities = local_densities(mix_session, mix_uni_heroes, mix_uni_subgraph)
mix_uni_nn_densities

## Mixed Bi Densities Nearest Neighbors

In [None]:
mix_bi_heroes= '''
MATCH (h:Hero)
OPTIONAL MATCH (h)-[:APPEARS_IN*..2]-(o)
RETURN h.name as hero, labels(o)[-1] as nn_type, count(o) as nn_ct'''

mix_bi_subgraph='''
MATCH (h:Hero)
call apoc.path.subgraphAll(h, {maxLevel:2, relationshipFilter: "APPEARS_IN"})
YIELD nodes, relationships
RETURN h.name as hero, labels(h)[-1] as h_type, size(nodes)-1 as nn_ct, size(relationships) as rel_ct'''

In [None]:
mix_bi_nn_densities = local_densities(mix_session, mix_bi_heroes, mix_bi_subgraph)
mix_bi_nn_densities

## Local densities

apoc.path.subgraphAll()
use `size` to count the nodes and relationships
- number of nearest nodes
  - number of nearest non villain
  - number of nearest villain
  - all
  - percent of villains
- number of relationships within nearest neighbor subgraph
- number of possible relationships
- density

- number of next nearest nodes
  - number of next nearest non villain
  - number of next nearest villain
  - all
  - percent of villains
- number of relationships within next nearest neighbor subgraph
- number of possible relationships
- density

