# Machine Learning 

In this section we'll explore how to use scikit-learn with Neo4j.

In [1]:
from neo4j.v1 import GraphDatabase, basic_auth
import numpy as np
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.metrics.pairwise import linear_kernel
import scipy

In [2]:
driver = GraphDatabase.driver("bolt://localhost:7687", auth=basic_auth("neo4j", "neo"))

## Finding similar characters

In the next section we'll calculate similarity between characters based on their interactions.

In [3]:
find_characters_query = """\
match (c:Character)
return c.name AS character
ORDER BY character
"""

interactions_query = """\
MATCH (c:Character {name: {name}}), (other:Character) 
WITH c, other, CASE WHEN exists((c)--(other)) THEN 1 ELSE 0 END AS exists
ORDER BY other.name
RETURN COLLECT(exists) AS interactions
"""

with driver.session() as session:    
    characters = [record["character"] for record in session.run(find_characters_query)]

    matrix = []
    for character in characters:
        result = session.run(interactions_query, name = character)
        matrix.append(result.peek()["interactions"])

In [4]:
matrix[0][:10]

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

The following function finds the most similar characters for a given character based on their interactions with other characters.

In [5]:
def find_similar(matrix, index, top_n = 5):
    cosine_similarities = linear_kernel(matrix[index:index+1], matrix).flatten()
    related_indices = [i for i in cosine_similarities.argsort()[::-1] if i != index]
    return [(index, cosine_similarities[index]) for index in related_indices][0:top_n]

In [6]:
characters_with_index = list(enumerate(characters))

In [7]:
def find_character_index(name):
    return [index for (index, character) in characters_with_index if character == name][0]

In [8]:
character_index = find_character_index("Arya-Stark")

print("Character: " + characters[character_index])
for index, score in find_similar(matrix, character_index, 10):
    print(score, characters[index])

Character: Arya-Stark
31.0 Joffrey-Baratheon
30.0 Eddard-Stark
29.0 Jaime-Lannister
28.0 Cersei-Lannister
26.0 Sansa-Stark
26.0 Sandor-Clegane
26.0 Robert-Baratheon
25.0 Tyrion-Lannister
25.0 Robb-Stark
21.0 Jon-Snow


In [9]:
find_characters_query = """\
match (c:Character)
return c.name AS character
ORDER BY character
"""

interactions_query = """\
MATCH (c:Character {name: {name}}), (other:Character) 
WITH c, other, coalesce([ (c)-[rel]-(other) | rel.weight ][0], 0) AS interactions
ORDER BY other.name
RETURN COLLECT(interactions) AS interactions
"""

with driver.session() as session:    
    characters = [record["character"] for record in session.run(find_characters_query)]

    count_matrix = []
    for character in characters:
        result = session.run(interactions_query, name = character)
        count_matrix.append(result.peek()["interactions"])

In [10]:
character_index = find_character_index("Arya-Stark")

print("Character: " + characters[character_index])
for index, score in find_similar(count_matrix, character_index, 10):
    print(score, characters[index])

Character: Arya-Stark
15814.0 Robert-Baratheon
12621.0 Joffrey-Baratheon
12182.0 Eddard-Stark
8815.0 Cersei-Lannister
7593.0 Robb-Stark
6679.0 Sansa-Stark
6576.0 Bran-Stark
6173.0 Tyrion-Lannister
5494.0 Catelyn-Stark
5230.0 Jon-Snow
