In [1]:
import neo4j

import pandas as pd

from IPython.display import display

In [19]:
driver = neo4j.GraphDatabase.driver(uri="neo4j://neo4j:7687", auth=("neo4j","ucb_mids_w205"))

In [20]:
session = driver.session(database="neo4j")

In [21]:
def my_neo4j_wipe_out_database():
    "wipe out database by deleting all nodes and relationships"
    
    query = "match (node)-[relationship]->() delete node, relationship"
    session.run(query)
    
    query = "match (node) delete node"
    session.run(query)

In [22]:
def my_neo4j_run_query_pandas(query, **kwargs):
    "run a query and return the results in a pandas dataframe"
    
    result = session.run(query, **kwargs)
    
    df = pd.DataFrame([r.values() for r in result], columns=result.keys())
    
    return df

In [23]:
def my_neo4j_nodes_relationships():
    "print all the nodes and relationships"
   
    print("-------------------------")
    print("  Nodes:")
    print("-------------------------")
    
    query = """
        match (n) 
        return n.name as node_name, labels(n) as labels
        order by n.name
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_nodes = df.shape[0]
    
    display(df)
    
    print("-------------------------")
    print("  Relationships:")
    print("-------------------------")
    
    query = """
        match (n1)-[r]->(n2) 
        return n1.name as node_name_1, labels(n1) as node_1_labels, 
            type(r) as relationship_type, n2.name as node_name_2, labels(n2) as node_2_labels
        order by node_name_1, node_name_2
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_relationships = df.shape[0]
    
    display(df)
    
    density = (2 * number_relationships) / (number_nodes * (number_nodes - 1))
    
    print("-------------------------")
    print("  Density:", f'{density:.1f}')
    print("-------------------------")
    

In [30]:
df = my_neo4j_nodes_relationships()

-------------------------
  Nodes:
-------------------------


Unnamed: 0,node_name,labels
0,#1 Coal Mine,[Energy_Project]
1,#1 Coal Mine (Ki-Coal),[Energy_Project]
2,#1 Coal Mine (Knott County),[Energy_Project]
3,#1 Mine (KY),[Energy_Project]
4,#1 Strip Mine,[Energy_Project]
...,...,...
28619,Şişecam Enerji,[Entity]
28620,Şırnak Silopi (CİNER) power station Unit 1,[Energy_Project]
28621,Şırnak Silopi (CİNER) power station Unit 2,[Energy_Project]
28622,Şırnak Silopi (CİNER) power station Unit 3,[Energy_Project]


-------------------------
  Relationships:
-------------------------


Unnamed: 0,node_name_1,node_1_labels,relationship_type,node_name_2,node_2_labels
0,3 Main Mine 3M,[Energy_Project],SHARE,Hwange Colliery,[Entity]
1,3B Power,[Entity],SHARE,Chugoku Electric Power International Netherlands,[Entity]
2,3B Power,[Entity],SHARE,Mitsui & Co,[Entity]
3,4C Acquisition,[Entity],SHARE,Pinnacle West Capital,[Entity]
4,6 October power plant 1,[Energy_Project],SHARE,Cairo Electricity Production,[Entity]
...,...,...,...,...,...
13462,Şişecam Enerji,[Entity],SHARE,Türkiye İş Bankasi,[Entity]
13463,Şırnak Silopi (CİNER) power station Unit 1,[Energy_Project],SHARE,Silopi Elektrik Üretim,[Entity]
13464,Şırnak Silopi (CİNER) power station Unit 2,[Energy_Project],SHARE,Silopi Elektrik Üretim,[Entity]
13465,Şırnak Silopi (CİNER) power station Unit 3,[Energy_Project],SHARE,Silopi Elektrik Üretim,[Entity]


-------------------------
  Density: 0.0
-------------------------


# Page Rank

In [25]:
query = "CALL gds.graph.drop('ds_graph', false)"
session.run(query)

query = "CALL gds.graph.project('ds_graph', 'Entity', 'SHARE', {relationshipProperties: 'share'})"
session.run(query)

<neo4j._sync.work.result.Result at 0x7f58ab45f670>

In [26]:
query = """

CALL gds.pageRank.stream('ds_graph',
                         { maxIterations: $max_iterations,
                           dampingFactor: $damping_factor}
                         )
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).name AS name, score as page_rank
ORDER BY page_rank DESC, name ASC

"""

max_iterations = 20
damping_factor = 0.05

my_neo4j_run_query_pandas(query, max_iterations=max_iterations, damping_factor=damping_factor)

Unnamed: 0,name,page_rank
0,Blackrock,2.334670
1,The Vanguard Group,1.924467
2,The Master Trust Bank of Japan,1.906833
3,ENGIE,1.875190
4,ArcelorMittal,1.822456
...,...,...
4006,Öresundskraft,0.950000
4007,Ørsted,0.950000
4008,ČEZ Polska,0.950000
4009,İzdemir Enerji Elektrik Üretim,0.950000


In [27]:
df = my_neo4j_run_query_pandas(query, max_iterations=max_iterations, damping_factor=damping_factor)

In [28]:
df.head(10)

Unnamed: 0,name,page_rank
0,Blackrock,2.33467
1,The Vanguard Group,1.924467
2,The Master Trust Bank of Japan,1.906833
3,ENGIE,1.87519
4,ArcelorMittal,1.822456
5,Siberian Coal Energy Company,1.670812
6,Glencore,1.659988
7,Vietnam National Coal and Mineral Industries H...,1.630833
8,Peabody Energy,1.597247
9,Life Insurance Corporation of India,1.579365


## Personalized Page Rank - Page Rank from a single node; what's important to a specific user; target recommendations to a specific user

In [34]:
query = """

MATCH (siteA:Entity {name: $source})
CALL gds.pageRank.stream('ds_graph', {
  maxIterations: $max_iterations,
  dampingFactor: $damping_factor,
  sourceNodes: [siteA]
})
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).name AS name, score as page_rank
ORDER BY score DESC, name ASC

"""

source = "Blackstone"
max_iterations = 20
damping_factor = 0.85

my_neo4j_run_query_pandas(query, source=source, max_iterations=max_iterations, damping_factor=damping_factor)

Unnamed: 0,name,page_rank
0,Blackstone,0.150000
1,The Vanguard Group,0.117938
2,Blackrock,0.063750
3,1832 Asset Management,0.000000
4,24R Advisory Services,0.000000
...,...,...
4006,Österreichische Beteiligungs,0.000000
4007,Ørsted,0.000000
4008,ČEZ Polska,0.000000
4009,İzdemir Enerji Elektrik Üretim,0.000000
