In [8]:
from neo4j import GraphDatabase
from py2neo import Graph
import pandas as pd

In [15]:
driver = GraphDatabase.driver("bolt://62eee17d.databases.neo4j.io", auth=("got", "got"))

In [55]:
with driver.session() as session:
    result = session.run("""
    MATCH (p:Person)-->(h:House)
    RETURN p.name AS person, 
           size((h)<--(:Person:Dead)) as deathsInHouse, 
           size((p)--(:Person:Dead)) as deadRelated, 
           size((p)--(:Battle)) as numberOfBattles,
           p.book1PageRank AS book1PageRank,
           p.book2PageRank AS book2PageRank,
           p.book3PageRank AS book3PageRank,
           p:Dead as  isDead
    """)
    df = pd.DataFrame([dict(record) for record in result])
    
display(df.head())    

Unnamed: 0,book1PageRank,book2PageRank,book3PageRank,deadRelated,deathsInHouse,isDead,numberOfBattles,person
0,0.15,0.15,0.15,0,0,False,0,Shierle Swyft
1,0.15,0.15,0.15,0,0,False,0,Steffon Swyft
2,0.15,0.15,0.15,0,0,False,0,Harys Swyft
3,0.15,0.15,0.15,0,0,False,0,Jocelyn Swyft
4,0.15,0.15,0.15,0,0,False,0,Joanna Swyft


In [63]:
columns = ['deathsInHouse', "deadRelated", "numberOfBattles", "book1PageRank", "book2PageRank", "book3PageRank"]
X = df[columns]
y = df[['isDead']]

In [64]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1)

In [65]:
import time
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.ensemble import RandomForestClassifier

random_forest = RandomForestClassifier(n_estimators=30, max_depth=10, random_state=1)
random_forest.fit(X_train, y_train.values.ravel())

y_predict = random_forest.predict(X_test)

display(accuracy_score(y_test, y_predict))
display(precision_score(y_test, y_predict, average="binary"))
display(recall_score(y_test, y_predict, average="binary"))

0.7861842105263158

0.7317073170731707

0.14018691588785046

In [66]:
for score, feature in zip(random_forest.feature_importances_, columns):
    print(feature, score)

deathsInHouse 0.5216250247319187
deadRelated 0.14162603942595467
numberOfBattles 0.05963185444871031
book1PageRank 0.0977218784454429
book2PageRank 0.10830071831882249
book3PageRank 0.07109448462915083
