# Importing a csv file to a neo4j

In [111]:
import pandas as pd
from neo4j.v1 import GraphDatabase, basic_auth

driver = GraphDatabase.driver("bolt://localhost:7687", auth=basic_auth("neo4j", "blockchain"))
session = driver.session()
# ethereum = pd.read_csv("./transactions.csv", chunksize=1000, iterator=True, sep=";")
ethereum = [pd.read_csv("./transactions.csv", nrows=1000, sep=";")]
IN_ADDRESS = 'from'
OUT_ADDRESS = 'to'
AMOUNT = 'value'
for chunk in ethereum:
    for index, row in chunk.iterrows():
        session.run("MERGE (address1:Address {address:{address1}})"
                    "MERGE (address2:Address {address:{address2}})"
                    "CREATE UNIQUE (address1)-[t:Transactions]->(address2)"
                    "SET t.amount = coalesce(t.amount, 0) - {amount}"
                    "SET t.number = coalesce(t.number, 0) - 1",
                    {"address1": row[IN_ADDRESS], "address2": row[OUT_ADDRESS], 'amount': row[AMOUNT]})
session.close()

# Clustering a neo4j graph

```cypher
//Find all connected components
CALL algo.unionFind('Address', 'Transactions', {write:true, partitionProperty:"component"})
YIELD nodes, setCount, loadMillis, computeMillis, writeMillis

//Create minimum spanning tree
//TODO Use connected components only
MATCH(a:Address{address: '0x0d4ecc7d750180ebf4a9df728b6669b5bceb5e40'}) 
CALL algo.mst(a, 'number', {write:true, writeProperty:"MSTree"})
YIELD loadMillis, computeMillis, writeMillis, weightSum, weightMin, weightMax, relationshipCount
RETURN relationshipCount;

//Create clusters
MATCH ()-[t:MSTree]->() 
WITH t 
ORDER BY t.number 
LIMIT 80 //Number of clusters - number of components - 1
DELETE t;

//Remove transactions relation
MATCH ()-[t:Transactions]->() DELETE t;

//Remove addresses
MATCH (a:Address) DETACH DELETE a;
```

### Open session

In [112]:
driver = GraphDatabase.driver("bolt://localhost:7687", auth=basic_auth("neo4j", "blockchain"))
session = driver.session()

### Find connected components

In [114]:
session.run("CALL algo.unionFind('Address', 'Transactions', {write:true, partitionProperty:'component'}) "
            "YIELD nodes, setCount, loadMillis, computeMillis, writeMillis "
            "RETURN nodes;")

<neo4j.v1.result.BoltStatementResult at 0x7f0a21768cc0>

### Create minimum spanning tree

In [107]:
session.run("MATCH(a:Address{address: '0x0d4ecc7d750180ebf4a9df728b6669b5bceb5e40'}) "
            "CALL algo.mst(a, 'number', {write:true, writeProperty:'MSTree'}) "
            "YIELD loadMillis, computeMillis, writeMillis, weightSum, weightMin, weightMax, relationshipCount "
            "RETURN relationshipCount;")

<neo4j.v1.result.BoltStatementResult at 0x7f0a218c5da0>

### Create clusters

In [108]:
session.run("MATCH ()-[t:MSTree]->() "
            "WITH t "
            "ORDER BY t.number DESC "
            "LIMIT 40 "
            "DELETE t;")

<neo4j.v1.result.BoltStatementResult at 0x7f0a218c56d8>

### Remove MSTree relation

In [44]:
session.run("MATCH ()-[t:MSTree]->() DELETE t;")

<neo4j.v1.result.BoltStatementResult at 0x7f0a2180c8d0>

### Remove transactions relation

In [109]:
session.run("MATCH ()-[t:Transactions]->() DELETE t;")

<neo4j.v1.result.BoltStatementResult at 0x7f0a218c5e80>

### Remove addresses

In [110]:
session.run("MATCH (a:Address) DETACH DELETE a;")
session.close()