In [1]:
# Setup
from neo4j import GraphDatabase
from graphdatascience import GraphDataScience
import pandas as pd

# Set the display options
pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('max_colwidth', None)

# Connect to Neo4j
uri = "bolt://localhost:7687" # CHANGE TO YOUR PORT NUMBER
user = "neo4j"
password = "neo4j12345"       # CHANGE PASSWORD

gds = GraphDataScience(uri, auth=(user,password))
gds.set_database("neo4j")

In [None]:
# Create data with Cypher
gds.run_cypher("""
CREATE (mitch:Individual {name: 'Mitch'})

CREATE (carl:Individual {name: 'Carl'}),
       (nathan:Individual {name: 'Nathan'}),
       (sophia:Individual {name: 'Sophia'}),
       (emma:Individual {name: 'Emma'}),
       (olivia:Individual {name: 'Olivia'}),
       (james:Individual {name: 'James'}),
       (michael:Individual {name: 'Michael'}),
       (emily:Individual {name: 'Emily'}),
       (madison:Individual {name: 'Madison'}),
       (david:Individual {name: 'David'}),

       (mitch)-[:FRIEND_OF]->(carl),
       (mitch)-[:FRIEND_OF]->(sophia),
       (mitch)-[:FRIEND_OF]->(emma),
       (mitch)-[:FRIEND_OF]->(olivia),
       (mitch)-[:FRIEND_OF]->(james),
       (mitch)-[:FRIEND_OF]->(michael),
       (mitch)-[:FRIEND_OF]->(emily),
       (mitch)-[:FRIEND_OF]->(madison),
       (mitch)-[:FRIEND_OF]->(david),

       (carl)-[:FRIEND_OF]->(nathan);

""")

## Load the Recipe CSV Datasets into Neo4j

In [4]:
#Create Person Node
gds.run_cypher(f""" 

LOAD CSV WITH HEADERS FROM 'file:///Fake_Data_FBI_Neo4j.csv' AS row
CALL {{
    WITH row
    CREATE (:Person {{
        full_name: row.full_name,
        ssn: row.ssn,
        ip_address: row.ip_address,
        email: row.email,
        phone_number: row.phone_number,
        address: row.address,
        suspicious_activity_report: row.suspicious_activity_report,
        FBI_case_number: row.FBI_case_number
    }})
}}

""")

In [4]:
#Create Person Node in batches of 1,000
gds.run_cypher(""" 

LOAD CSV WITH HEADERS FROM 'file:///Fake_Data_FBI_Neo4j.csv' AS row
WITH row
CALL {
    WITH row
    CREATE (:Person {
        full_name: row.full_name,
        ssn: row.ssn,
        ip_address: row.ip_address,
        email: row.email,
        phone_number: row.phone_number,
        address: row.address,
        suspicious_activity_report: row.suspicious_activity_report,
        FBI_case_number: row.FBI_case_number
    })
} IN TRANSACTIONS OF 1000 ROWS

""")

In [5]:
#Create phone number node
gds.run_cypher(f"""

MATCH (p:Person)
WHERE p.phone_number IS NOT NULL
MERGE (ph:Phone {{number: p.phone_number}})
MERGE (p)-[:HAS_PHONE_NUMBER]->(ph)

""")

In [6]:
#Create IP Address
gds.run_cypher(f"""

MATCH (p:Person)
WHERE p.ip_address IS NOT NULL
MERGE (ph:IP {{ip_address: p.ip_address}})
MERGE (p)-[:HAS_IP]->(ph)

""")

In [7]:
#Create SSN
gds.run_cypher(f"""

MATCH (p:Person)
WHERE p.ssn IS NOT NULL
MERGE (ph:SSN {{ssn: p.ssn}})
MERGE (p)-[:HAS_SSN]->(ph)

""")

In [8]:
#Create FBI case number
gds.run_cypher(f"""

MATCH (p:Person)
WHERE p.FBI_case_number IS NOT NULL
MERGE (ph:FBI_Case {{FBI_case_number: p.FBI_case_number}})
MERGE (p)-[:HAS_FBI_CASE]->(ph)

""")

In [9]:
#Create Address node
gds.run_cypher(f"""

MATCH (p:Person)
WHERE p.address IS NOT NULL
MERGE (ph:Address {{address: p.address}})
MERGE (p)-[:HAS_ADDRESS]->(ph)

""")

In [10]:
#Create email node
gds.run_cypher(f"""

MATCH (p:Person)
WHERE p.email IS NOT NULL
MERGE (ph:Email {{email: p.email}})
MERGE (p)-[:HAS_EMAIL]->(ph)

""")

In [11]:
#Create suspicious activity node
gds.run_cypher(f"""

MATCH (p:Person)
WHERE p.suspicious_activity_report IS NOT NULL
MERGE (ph:SAR {{suspicious_activity_report: p.suspicious_activity_report}})
MERGE (p)-[:HAS_SAR]->(ph)

""")

In [12]:
#make new nodes for suspicious activty report and fbi case number to see which people are under the same reports
gds.run_cypher(f"""

MATCH (p:Person)
WHERE p.suspicious_activity_report IS NOT NULL
      AND p.fbi_case_number IS NOT NULL
      
MERGE (sus:SAR {{report_number: p.suspicious_activity_report}})
MERGE (fbi:FBI_num {{case_number: p.fbi_case_number}})

MERGE (p)-[:HAS_REPORT_NUMBER]->(sus)
MERGE (p)-[:HAS_CASE_NUMBER]->(fbi)

""")

## Load Flower Data with Pandas

In [10]:
from neo4j import GraphDatabase
from getpass import getpass

In [11]:
password = getpass()

········


In [12]:
driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", password))

In [3]:
import pandas as pd
import numpy as np
import os
df = pd.read_csv (r'C:\Users\TimEa\OneDrive\Data\Iris\iris.csv', encoding= 'unicode_escape')
df = df.rename(columns=lambda x: x.replace('.', '_'))
print (df.head())

   Sepal_Length  Sepal_Width  Petal_Length  Petal_Width Species
0           5.1          3.5           1.4          0.2  setosa
1           4.9          3.0           1.4          0.2  setosa
2           4.7          3.2           1.3          0.2  setosa
3           4.6          3.1           1.5          0.2  setosa
4           5.0          3.6           1.4          0.2  setosa


In [5]:
target_variable = 'Species'
independent_variables = [col for col in df.columns if col not in ['Id', target_variable]]

In [6]:
# Reset the index and generate a new index column named 'index'
df = df.reset_index()

# Add a new column of IDs based on the 'index' column
df['Id'] = df['index'].astype(str)

# Drop the 'index' column if desired
df = df.drop(columns=['index'])

# Convert target variable to a numeric value
df['Target'] = pd.Categorical(df[target_variable]).codes

print(df.head())

   Sepal_Length  Sepal_Width  Petal_Length  Petal_Width Species Id  Target
0           5.1          3.5           1.4          0.2  setosa  0       0
1           4.9          3.0           1.4          0.2  setosa  1       0
2           4.7          3.2           1.3          0.2  setosa  2       0
3           4.6          3.1           1.5          0.2  setosa  3       0
4           5.0          3.6           1.4          0.2  setosa  4       0


In [9]:
# Convert the dataframe to a list of dictionaries
data = df.to_dict('records')
display(data[1])

{'Sepal_Length': 4.9,
 'Sepal_Width': 3.0,
 'Petal_Length': 1.4,
 'Petal_Width': 0.2,
 'Species': 'setosa',
 'Id': '1',
 'Target': 0}

In [20]:
with driver.session() as session:
    session.run("""
    UNWIND $data AS row
    MERGE (s:Flower_Record {Row_ID: toInteger(row['Id'])})
    ON CREATE SET s.Target = toInteger(row['Target'])
        , s.Sepal_Length = row['Sepal_Length']
        , s.Sepal_Width  = row['Sepal_Width']
        , s.Petal_Length = row['Petal_Length']
        , s.Petal_Width  = row['Petal_Width']
        , s.Species      = row['Species']""",
               {"data":data})

In [21]:
gds.run_cypher("""
CREATE INDEX Row_ID IF NOT EXISTS FOR (s:Flower_Record) ON (s.Row_ID)
""")

In [19]:
gds.run_cypher("""
SHOW INDEX
""")

Unnamed: 0,id,name,state,populationPercent,type,entityType,labelsOrTypes,properties,indexProvider,owningConstraint
0,7,Ingredient_ID,ONLINE,100.0,RANGE,NODE,[Ingredient],[Ingredient_ID],range-1.0,Ingredient_ID
1,5,Recipe_ID,ONLINE,100.0,RANGE,NODE,[Recipe],[Recipe_ID],range-1.0,Recipe_ID
2,9,Row_ID,ONLINE,100.0,RANGE,NODE,[Species],[Row_ID],range-1.0,
3,1,index_343aff4e,ONLINE,100.0,LOOKUP,NODE,,,token-lookup-1.0,
4,2,index_f7700477,ONLINE,100.0,LOOKUP,RELATIONSHIP,,,token-lookup-1.0,
5,3,rowid,ONLINE,100.0,RANGE,NODE,[Recipe],[Row_ID],range-1.0,rowid


In [23]:
gds.run_cypher("""
// First, ensure there is a Petal_Length node
MERGE (p:Petal_Length)
ON CREATE SET p.name = 'Petal Length' // You can set properties if needed

// Match all Flower_Record nodes and connect them to the Petal_Length node
WITH p
MATCH (s:Flower_Record)
MERGE (s)-[r:PETAL_LENGTH]->(p)
SET r.Petal_Length = toFloat(s.Petal_Length)
""")

In [24]:
gds.run_cypher("""
// First, ensure there is a Petal_Width node
MERGE (p:Petal_Width)
ON CREATE SET p.name = 'Petal Width' // You can set properties if needed

// Match all Flower_Record nodes and connect them to the Petal_Width node
WITH p
MATCH (s:Flower_Record)
MERGE (s)-[r:PETAL_WIDTH]->(p)
SET r.Petal_Width = toFloat(s.Petal_Width)
""")

In [25]:
gds.run_cypher("""
// First, ensure there is a Petal_Width node
MERGE (p:Sepal_Length)
ON CREATE SET p.name = 'Sepal Length' // You can set properties if needed

// Match all Flower_Record nodes and connect them to the Sepal_Length node
WITH p
MATCH (s:Flower_Record)
MERGE (s)-[r:SEPAL_LENGTH]->(p)
SET r.Sepal_Length = toFloat(s.Sepal_Length)
""")

In [26]:
gds.run_cypher("""
// First, ensure there is a Petal_Width node
MERGE (p:Sepal_Width)
ON CREATE SET p.name = 'Sepal Length' // You can set properties if needed

// Match all Flower_Record nodes and connect them to the Sepal_Width node
WITH p
MATCH (s:Flower_Record)
MERGE (s)-[r:SEPAL_WIDTH]->(p)
SET r.Sepal_Width = toFloat(s.Sepal_Width)
""")