In [4]:
from neo4j import GraphDatabase
import os
from dotenv import load_dotenv
import logging
from multiprocessing import Pool
import pandas as pd
import itertools

## Load Enviornment Variables

In [5]:
load_dotenv()
USERNAME = os.getenv('USERNAME_NEO4J')
PASS = os.getenv('PASSWORD_NEO4J')
URI = 'neo4j://localhost:7687'

## Configuring Logging

In [6]:
# Configure logging
logging.basicConfig(filename='neo4j_import.log',
                     level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


## Defining Graph Properties for Neo4j

### Cypher Queries - Graph Schema

In [11]:
# Function to connect to Neo4j
def connect_to_neo4j(uri, user, password):
    driver = GraphDatabase.driver(uri, auth=(user, password))
    return driver

# LOAD CSV WITH HEADERS FROM 'file:///artists-with-headers.csv' AS line
# CREATE (:Artist {name: line.Name, year: toInteger(line.Year)})
#               WITH line, toFloat(line.value) / 1.0E18 AS value
def import_data_from_csv(driver):
    with driver.session() as session:
        query = """
           LOAD CSV WITH HEADERS FROM 'file:///illicit_transaction_batches/illicit_transactions_eth.csv' AS row
           CALL {
               WITH row 
               MERGE (from:Account {address: COALESCE(row.from, 'Unknown')})
               MERGE (to:Account {address: COALESCE(row.to, 'Unknown')})

               MERGE (sc:SmartContract {contractAddress: row.contractAddress})

               MERGE (block:Block {blockNumber: toInteger(row.blockNumber), timeStamp: toInteger(row.timeStamp)})
               ON CREATE SET block.timeStamp = toInteger(row.timeStamp)

               WITH row, toFloat(row.value) / 1.0E18 AS value
    
               CREATE (from)-[:TRANSFER {
                timeStamp: toInteger(row.timeStamp),
                hash: row.hash,
                value: value,
                gas: toInteger(row.gas),
                gasPrice: toInteger(row.gasPrice),
                isError: toBoolean(row.isError),
                reputabilityFlag: toInteger(row.FLAG)
                }]->(to)

                CREATE (block)-[:CONTAINS]->(from)
                CREATE (block)-[:CONTAINS]->(to)

               CASE WHEN sc IS NOT NULL THEN
                CREATE (from)-[:INVOKES]->(sc),
                CREATE (to)-[:INVOKES]->(sc)
               END
           } IN TRANSACTIONS OF 500 ROWS
        """
        session.run(query) 

# Function to close the Neo4j connection
def close_neo4j_connection(driver):
    driver.close()

## Pushing to Neo4j - Execution

### Pre Analysis - Illicit

In [18]:
ill_df = pd.read_csv('../illicit_transaction_batches/illicit_transactions_eth.csv')


In [19]:
(ill_df.isna().sum() / len(ill_df)) * 100

blockNumber         0.000000
timeStamp           0.000000
hash                0.000000
from                0.000000
to                  0.059556
value               0.000000
gas                 0.000000
gasPrice            0.000000
isError             0.000000
contractAddress    99.940444
FLAG                0.000000
dtype: float64

### Illicit Smart Contracts

In [10]:
file_path = "illicit_transactions_eth.csv"

driver = connect_to_neo4j(URI, USERNAME, PASS)
import_data_from_csv(driver)  # Set include_contracts to True if needed
close_neo4j_connection(driver)

CypherSyntaxError: {code: Neo.ClientError.Statement.SyntaxError} {message: Invalid input 'CASE': expected
  "("
  ","
  "CALL"
  "CREATE"
  "DELETE"
  "DETACH"
  "FOREACH"
  "LOAD"
  "MATCH"
  "MERGE"
  "NODETACH"
  "OPTIONAL"
  "REMOVE"
  "RETURN"
  "SET"
  "UNION"
  "UNWIND"
  "USE"
  "WITH"
  "}" (line 28, column 16 (offset: 1136))
"               CASE WHEN sc IS NOT NULL THEN"
                ^}

In [9]:
def check_data_existence():
    # Define Cypher queries to count nodes and relationships
    node_query = "MATCH (n:Account) RETURN COUNT(n:Account) AS nodeCount"
    relationship_query = "MATCH ()-[r]->() RETURN COUNT(r) AS relationshipCount"
    
    # Connect to Neo4j
    driver = GraphDatabase.driver(URI, auth=(USERNAME, PASS), database="neo4j")
    
    with driver.session() as session:
        # Execute node count query
        node_result = session.run(node_query).single()
        node_count = node_result["nodeCount"]
        
        # Execute relationship count query
        relationship_result = session.run(relationship_query).single()
        relationship_count = relationship_result["relationshipCount"]
        
        # Print node and relationship counts
        print(f"Node count: {node_count}")
        print(f"Relationship count: {relationship_count}")
        
        # Check if any nodes or relationships exist
        if node_count > 0 or relationship_count > 0:
            print("Data exists in Neo4j.")
        else:
            print("No data found in Neo4j.")

    # Close the Neo4j driver
    driver.close()


In [10]:
check_data_existence()

Node count: 0
Relationship count: 0
No data found in Neo4j.
