# Question 5

- Implement K-mean clustering against Driver.


## Install packages


In [6]:
# %pip install -r requirements.txt
# %pip install neo4j
# %pip install graphdatascience

---


## Parameters


In [7]:
URI = "neo4j://localhost"
DATABASE = "neo4j"
USERNAME = "neo4j"
PWD = "Neo4j1234"
AUTH = ((USERNAME, PWD))

---

## Establish Connection


- Test Connection


In [8]:
from neo4j import GraphDatabase

with GraphDatabase.driver(URI, auth=(USERNAME, PWD)) as driver:
    try:
        driver.verify_connectivity()
        print("Connect success.")
    except Exception as e:
        print(f"Error: \n{e}")

Connect success.


---


## Clustering

- Cypher script


In [9]:
process_born = """
// 0-data process
MATCH (d:Driver)
SET d._born =[toFloat(d.born)]
RETURN d
"""

drop_graph = """
// 1--drop graph
CALL gds.graph.drop("graph_driver_cluster") YIELD graphName
"""

create_graph = """
// 1-create graph
CALL gds.graph.project(
    'graph_driver_cluster',
    {
      Driver: {
        properties: '_born'
      }
    },
    '*'
)
"""

k_mean = """
// 2-k-mean
CALL gds.kmeans.stream(
    'graph_driver_cluster', 
    {
        nodeProperty: '_born',
        k: 3,
        randomSeed: 42
    }
)
YIELD nodeId, communityId
RETURN 
    gds.util.asNode(nodeId).name AS Driver,
    gds.util.asNode(nodeId).born AS Year,
    communityId
ORDER BY communityId, Year, Driver ASC
"""

In [10]:
with GraphDatabase.driver(URI, auth=(USERNAME, PWD)) as driver:
    # Execute the PageRank algorithm
    with driver.session() as session:
        session.run(process_born)       # process born
        # session.run(drop_graph)         # drop_graph
        session.run(create_graph)       # create_graph
        
        result = session.run(k_mean)
        
        for record in result:
            print(f"{record['communityId']}\t {record['Year']}\t {record['Driver']}")

0	 1976	 Mark Webber
0	 1979	 Kimi Raikkonen
0	 1980	 Jenson Button
0	 1981	 Felipe Massa
0	 1981	 Fernando Alonso
0	 1981	 Heikki Kovalainen
1	 1989	 Daniel Ricciardo
1	 1989	 Jules Bianchi
1	 1989	 Valtteri Bottas
1	 1990	 Charles Pic
1	 1990	 Jean-Éric Vergne
1	 1990	 Sergio Pérez
1	 1991	 Esteban Gutiérrez
1	 1991	 Max Chilton
2	 1983	 Adrian Sutil
2	 1985	 Giedo van der Garde
2	 1985	 Lewis Hamilton
2	 1985	 Nico Rosberg
2	 1985	 Pastor Maldonado
2	 1986	 Paul di Resta
2	 1986	 Romain Grosjean
2	 1987	 Nico Hülkenberg
2	 1987	 Sebastian Vettel
