# Connections and Functions

In [1]:
import os
import neo4j
import pandas as pd
from IPython.display import display

In [2]:
driver = neo4j.GraphDatabase.driver(uri="neo4j://neo4j:7687", auth=("neo4j","ucb_mids_w205"))
session = driver.session(database="neo4j")

In [3]:
# functions
def my_neo4j_wipe_out_database():
    "wipe out database by deleting all nodes and relationships"
    query = "match (node)-[relationship]->() delete node, relationship"
    session.run(query) 
    query = "match (node) delete node"
    session.run(query)
    
def my_neo4j_run_query_pandas(query, **kwargs):
    "run a query and return the results in a pandas dataframe"
    result = session.run(query, **kwargs)
    df = pd.DataFrame([r.values() for r in result], columns=result.keys())
    
    return df

def my_neo4j_nodes_relationships():
    "print all the nodes and relationships"
    print("-------------------------")
    print("  Nodes:")
    print("-------------------------")
    query = """
        match (n) 
        return n.name as node_name, labels(n) as labels
        order by n.name
    """
    df = my_neo4j_run_query_pandas(query)
    number_nodes = df.shape[0]
    display(df)
    print("-------------------------")
    print("  Relationships:")
    print("-------------------------")
    query = """
        match (n1)-[r]->(n2) 
        return n1.name as node_name_1, labels(n1) as node_1_labels, 
            type(r) as relationship_type, n2.name as node_name_2, labels(n2) as node_2_labels
        order by node_name_1, node_name_2
    """
    df = my_neo4j_run_query_pandas(query)
    number_relationships = df.shape[0]
    display(df)
    density = (2 * number_relationships) / (number_nodes * (number_nodes - 1))
    print("-------------------------")
    print("  Density:", f'{density:.1f}')
    print("-------------------------")

# Testing

In [4]:
my_neo4j_wipe_out_database()

In [5]:
my_neo4j_wipe_out_database()

query = """

CREATE
    (E100000002359:Entity {id: 'E100000002359', name: '1590 Energy', entity_type: 'legal entity', legal_entity_type: 'Co', publicly_listed: '', country: ''}),
    (E100001014315:Entity {id: 'E100001014315', name: '1832 Asset Management', entity_type: 'legal entity', legal_entity_type: 'LP', publicly_listed: 'FALSE', country: 'Canada'}),
    (EP1:Energy_Project {name: '48th Street power station 9', energy_project_type: 'Gas or Oil Plant', capacity_mw: 84, fuel: 'fossil gas: natural gas', status: 'operating', country: 'United States'}),
    (E100000002359)-[:SHARE {share: 100}]->(E100001014315),
    (E100001014315)-[:SHARE {share: 70}]->(EP1)
    
"""

session.run(query)

<neo4j._sync.work.result.Result at 0x7fd2d631e580>

In [6]:
my_neo4j_nodes_relationships()

# in GUI run match (n) return n

-------------------------
  Nodes:
-------------------------


Unnamed: 0,node_name,labels
0,1590 Energy,[Entity]
1,1832 Asset Management,[Entity]
2,48th Street power station 9,[Energy_Project]


-------------------------
  Relationships:
-------------------------


Unnamed: 0,node_name_1,node_1_labels,relationship_type,node_name_2,node_2_labels
0,1590 Energy,[Entity],SHARE,1832 Asset Management,[Entity]
1,1832 Asset Management,[Entity],SHARE,48th Street power station 9,[Energy_Project]


-------------------------
  Density: 0.7
-------------------------


# CSV TESTING

In [12]:
my_neo4j_wipe_out_database()

query = """

LOAD CSV WITH HEADERS FROM 'file:///companies.csv' AS row
WITH row WHERE row.Id IS NOT NULL
MERGE (c:Company {companyId: row.Id});

"""

session.run(query)

<neo4j._sync.work.result.Result at 0x7fc5a13f1850>

In [28]:
print('file:///entity_nodes_abbr.csv')
print('file:///energy_project_nodes_abbr.csv')
print('file:///relationships.csv') # not in use
print('file:///ep_relationship_abbr.csv')
print('file:///entity_relationships_abbr.csv')

file:///entity_nodes_abbr.csv
file:///energy_project_nodes_abbr.csv
file:///relationships.csv
file:///ep_relationship_abbr.csv
file:///entity_relationships_abbr.csv


In [42]:
my_neo4j_wipe_out_database()

In [43]:
query = """

LOAD CSV WITH HEADERS FROM "file:///entity_nodes_abbr.csv" AS row
WITH row WHERE row.ID IS NOT NULL
MERGE (e:Entity {id: row.ID,
    name: row.Name,
    entity_type: coalesce(row['Entity Type'], "Unknown"),
    legal_entity_type: coalesce(row['Legal Entity Type'], "Unknown"),
    publicly_listed: coalesce(row.PubliclyListed, "Unknown"),
    country: coalesce(row.Country, "Unknown")
    });

"""

session.run(query)

<neo4j._sync.work.result.Result at 0x7fc5a12ddf70>

In [44]:
query = """

LOAD CSV WITH HEADERS FROM "file:///energy_project_nodes_abbr.csv" AS row
WITH row WHERE row.Energy_Project_Node_Name IS NOT NULL
MERGE (ep:Energy_Project {name: row.Energy_Project_Node_Name,
    energy_project_type: row.Energy_Project_Type,
    capacity_mw: coalesce(row["Capacity (MW)"], 0),
    status: coalesce(row.Status, "Unknown"),
    country: coalesce(row.Country, "Unknown"),
    capacity_mtpa: coalesce(row["Capacity (Mtpa)"], 0),
    nominal_crude_steel_capacity_ttpa: coalesce(row["Nominal crude steel capacity (ttpa)"], 0)
    });

"""

session.run(query)

<neo4j._sync.work.result.Result at 0x7fc5a12ddee0>

In [45]:
query = """

LOAD CSV WITH HEADERS FROM "file:///ep_relationship_abbr.csv" AS row
MATCH (f:Energy_Project {name: row.from})
MATCH (t:Entity {id: row.to})
MERGE (f)-[:SHARE {share: coalesce(row.share, "Unknown")}]->(t)

"""

session.run(query)

<neo4j._sync.work.result.Result at 0x7fc5a13f1880>

In [46]:
# unable to find match or command for Cypher
query = """

LOAD CSV WITH HEADERS FROM "file:///entity_relationships_abbr.csv" AS row
MATCH (f:Entity {id: row.from})
MATCH (t:Entity {id: row.to})
MERGE (f)-[:SHARE {share: coalesce(row.share, "Unknown")}]->(t)

"""

session.run(query)

<neo4j._sync.work.result.Result at 0x7fc5a12dd0d0>

## Final Import

In [4]:
print('file:///entity_nodes.csv')
print('file:///energy_project_nodes.csv')
print('file:///relationships.csv') # not in use
print('file:///energy_project_relationships.csv')
print('file:///entity_relationships.csv')

file:///entity_nodes.csv
file:///energy_project_nodes.csv
file:///relationships.csv
file:///energy_project_relationships.csv
file:///entity_relationships.csv


In [5]:
my_neo4j_wipe_out_database()

In [7]:
query = """

LOAD CSV WITH HEADERS FROM "file:///entity_nodes.csv" AS row
WITH row WHERE row.ID IS NOT NULL
MERGE (e:Entity {id: row.ID,
    name: row.Name,
    entity_type: coalesce(row['Entity Type'], "Unknown"),
    legal_entity_type: coalesce(row['Legal Entity Type'], "Unknown"),
    publicly_listed: coalesce(row.PubliclyListed, "Unknown"),
    country: coalesce(row.Country, "Unknown"),
    coal_plant_capacity: coalesce(row['coal plant capacity'],"None"),
    gas_plant_capacity: coalesce(row['gas capacity'],"None"),
    bio_plant_capacity: coalesce(row['bioenergy plant capacity'],"None"),
    coal_mine_capacity: coalesce(row['coal mine capacity'],"None"),
    steel_plant_capacity: coalesce(row['coal mine capacity'],"None")
    });

"""

session.run(query)

<neo4j._sync.work.result.Result at 0x7efdaa19ef10>

In [8]:
query = """

LOAD CSV WITH HEADERS FROM "file:///energy_project_nodes.csv" AS row
WITH row WHERE row.Energy_Project_Node_Name IS NOT NULL
MERGE (ep:Energy_Project {name: row.Energy_Project_Node_Name,
    energy_project_type: row.Energy_Project_Type,
    capacity_mw: coalesce(row["Capacity (MW)"], 0),
    status: coalesce(row.Status, "Unknown"),
    country: coalesce(row.Country, "Unknown"),
    capacity_mtpa: coalesce(row["Capacity (Mtpa)"], 0),
    nominal_crude_steel_capacity_ttpa: coalesce(row["Nominal crude steel capacity (ttpa)"], 0)
    });

"""

session.run(query)

<neo4j._sync.work.result.Result at 0x7efdaa117f70>

In [9]:
query = """

LOAD CSV WITH HEADERS FROM "file:///energy_project_relationships.csv" AS row
MATCH (f:Energy_Project {name: row.from})
MATCH (t:Entity {id: row.to})
MERGE (f)-[:SHARE {share: coalesce(row.share, "Unknown")}]->(t)

"""

session.run(query)

<neo4j._sync.work.result.Result at 0x7efdaa19eee0>

In [10]:
# read up on cypher.
query = """

LOAD CSV WITH HEADERS FROM "file:///entity_relationships.csv" AS row
MATCH (f:Entity {id: row.from})
MATCH (t:Entity {id: row.to})
MERGE (f)-[:SHARE {share: coalesce(row.share, "Unknown")}]->(t)

"""

session.run(query)

<neo4j._sync.work.result.Result at 0x7efdaa117bb0>

In [11]:
my_neo4j_nodes_relationships()

-------------------------
  Nodes:
-------------------------


Unnamed: 0,node_name,labels
0,#1 Coal Mine,[Energy_Project]
1,#1 Coal Mine (Ki-Coal),[Energy_Project]
2,#1 Coal Mine (Knott County),[Energy_Project]
3,#1 Mine (KY),[Energy_Project]
4,#1 Strip Mine,[Energy_Project]
...,...,...
29347,Şişecam Enerji,[Entity]
29348,Şırnak Silopi (CİNER) power station Unit 1,[Energy_Project]
29349,Şırnak Silopi (CİNER) power station Unit 2,[Energy_Project]
29350,Şırnak Silopi (CİNER) power station Unit 3,[Energy_Project]


-------------------------
  Relationships:
-------------------------


Unnamed: 0,node_name_1,node_1_labels,relationship_type,node_name_2,node_2_labels
0,3 Main Mine 3M,[Energy_Project],SHARE,Hwange Colliery,[Entity]
1,3B Power,[Entity],SHARE,Chugoku Electric Power International Netherlands,[Entity]
2,3B Power,[Entity],SHARE,Mitsui & Co,[Entity]
3,4C Acquisition,[Entity],SHARE,Pinnacle West Capital,[Entity]
4,6 October power plant 1,[Energy_Project],SHARE,Cairo Electricity Production,[Entity]
...,...,...,...,...,...
13790,Şişecam Enerji,[Entity],SHARE,Türkiye İş Bankasi,[Entity]
13791,Şırnak Silopi (CİNER) power station Unit 1,[Energy_Project],SHARE,Silopi Elektrik Üretim,[Entity]
13792,Şırnak Silopi (CİNER) power station Unit 2,[Energy_Project],SHARE,Silopi Elektrik Üretim,[Entity]
13793,Şırnak Silopi (CİNER) power station Unit 3,[Energy_Project],SHARE,Silopi Elektrik Üretim,[Entity]


-------------------------
  Density: 0.0
-------------------------
