# Project 3 - Create a graph for Illegal Ivory Trade
## You need to have the cluster anaconda_postgres_neo4j to run all the notebook

In [30]:
import neo4j

import pandas as pd
import math
import numpy as np

from IPython.display import display

import psycopg2

In [31]:
# function to run a select query and return rows in a pandas dataframe
# pandas puts all numeric values from postgres to float
# if it will fit in an integer, change it to integer
#

def my_select_query_pandas(query, rollback_before_flag, rollback_after_flag):
    "function to run a select query and return rows in a pandas dataframe"
    
    if rollback_before_flag:
        connection.rollback()
    
    df = pd.read_sql_query(query, connection)
    
    if rollback_after_flag:
        connection.rollback()
    
    # fix the float columns that really should be integers
    
    for column in df:
    
        if df[column].dtype == "float64":

            fraction_flag = False

            for value in df[column].values:
                
                if not np.isnan(value):
                    if value - math.floor(value) != 0:
                        fraction_flag = True

            if not fraction_flag:
                df[column] = df[column].astype('Int64')
    
    return(df)
    

In [32]:
connection = psycopg2.connect(
    user = "postgres",
    password = "ucb",
    host = "postgres",
    port = "5432",
    database = "postgres"
)

In [33]:
cursor = connection.cursor()

## Group the data to ensure we get one relationship between two countries

In [34]:
rollback_before_flag = True
rollback_after_flag = True

query = """

SELECT
    exporter,
    importer,
    SUM(quantity) AS total_quantity_kg
FROM
    ivory_trade
GROUP BY
    exporter,
    importer
    
limit 100000
"""

df = my_select_query_pandas(query, rollback_before_flag, rollback_after_flag)

df.head()

Unnamed: 0,exporter,importer,total_quantity_kg
0,AD,AT,2.0
1,AD,BE,2.95
2,AD,CH,14.0
3,AD,DE,2.0
4,AD,ES,17.0


In [35]:
df.shape

(14954, 3)

# Neo4j

## Web server interface at https://xxxx:7473


**Username: neo4j**

**Password: ucb_mids_w205**


## Connect, login, create driver, create session; with community edition, we can only use 1 database, the "neo4j" database

In [36]:
driver = neo4j.GraphDatabase.driver(uri="neo4j://neo4j:7687", auth=("neo4j","ucb_mids_w205"))

In [37]:
session = driver.session(database="neo4j")

## my_neo4j_wipe_out_database() - since community edition can only have 1 database "neo4j", this function will wipe out all the nodes and relationships

In [38]:
def my_neo4j_wipe_out_database():
    "wipe out database by deleting all nodes and relationships"
    
    query = "match (node)-[relationship]->() delete node, relationship"
    session.run(query)
    
    query = "match (node) delete node"
    session.run(query)

## my_neo4j_run_query_pandas() will run a Cypher query and put the results in a Pandas dataframe; easy to see how you can use Python to manipulate the returned data

In [39]:
def my_neo4j_run_query_pandas(query, **kwargs):
    "run a query and return the results in a pandas dataframe"
    
    result = session.run(query, **kwargs)
    
    df = pd.DataFrame([r.values() for r in result], columns=result.keys())
    
    return df

## my_neo4j_nodes_relationships() will print the nodes (assumes a name property) and relationships

In [40]:
def my_neo4j_nodes_relationships():
    "print all the nodes and relationships"
   
    print("-------------------------")
    print("  Nodes:")
    print("-------------------------")
    
    query = """
        match (n) 
        return n.name as node_name, labels(n) as labels
        order by n.name
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_nodes = df.shape[0]
    
    display(df)
    
    print("-------------------------")
    print("  Relationships:")
    print("-------------------------")
    
    query = """
        match (n1)-[r]->(n2) 
        return n1.name as node_name_1, labels(n1) as node_1_labels, 
            type(r) as relationship_type, n2.name as node_name_2, labels(n2) as node_2_labels
        order by node_name_1, node_name_2
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_relationships = df.shape[0]
    
    display(df)
    
    density = (2 * number_relationships) / (number_nodes * (number_nodes - 1))
    
    print("-------------------------")
    print("  Density:", f'{density:.1f}')
    print("-------------------------")
    

## Create Exporter Graph

In [42]:
my_neo4j_wipe_out_database()

for index, row in df.iterrows():
    
    importer = row['importer']
    exporter = row['exporter']
    quantity_kg = row['total_quantity_kg'] 

    query_exports_to = f"""
    MERGE (exporter_node:Country {{name: '{exporter}'}})
    MERGE (importer_node:Country {{name: '{importer}'}})
    CREATE (exporter_node)-[:EXPORTS_TO {{weight_kg: {quantity_kg}}}]->(importer_node)
    """

    session.run(query_exports_to)
    

In [43]:
my_neo4j_nodes_relationships()

-------------------------
  Nodes:
-------------------------


Unnamed: 0,node_name,labels
0,AD,[Country]
1,AE,[Country]
2,AF,[Country]
3,AG,[Country]
4,AI,[Country]
...,...,...
257,YU,[Country]
258,ZA,[Country]
259,ZC,[Country]
260,ZM,[Country]


-------------------------
  Relationships:
-------------------------


Unnamed: 0,node_name_1,node_1_labels,relationship_type,node_name_2,node_2_labels
0,AD,[Country],EXPORTS_TO,AT,[Country]
1,AD,[Country],EXPORTS_TO,BE,[Country]
2,AD,[Country],EXPORTS_TO,CH,[Country]
3,AD,[Country],EXPORTS_TO,DE,[Country]
4,AD,[Country],EXPORTS_TO,ES,[Country]
...,...,...,...,...,...
14949,ZW,[Country],EXPORTS_TO,YU,[Country]
14950,ZW,[Country],EXPORTS_TO,ZA,[Country]
14951,ZW,[Country],EXPORTS_TO,ZC,[Country]
14952,ZW,[Country],EXPORTS_TO,ZM,[Country]


-------------------------
  Density: 0.4
-------------------------


## In the Neo4j GUI, run the following query with graph output and rearrange the nodes with your mouse if necessary:

```match (n) return n```

## Create Importer Graph

In [45]:
my_neo4j_wipe_out_database()

for index, row in df.iterrows():
    
    importer = row['importer']
    exporter = row['exporter']
    quantity_kg = row['total_quantity_kg']

    query_imports_from = f"""
        MERGE (importer_node:Country {{name: '{importer}'}})
        MERGE (exporter_node:Country {{name: '{exporter}'}})
        CREATE (importer_node)-[:IMPORTS_FROM {{weight_kg: {quantity_kg}}}]->(exporter_node)
        """

    session.run(query_imports_from)

In [46]:
my_neo4j_nodes_relationships()

-------------------------
  Nodes:
-------------------------


Unnamed: 0,node_name,labels
0,AD,[Country]
1,AE,[Country]
2,AF,[Country]
3,AG,[Country]
4,AI,[Country]
...,...,...
257,YU,[Country]
258,ZA,[Country]
259,ZC,[Country]
260,ZM,[Country]


-------------------------
  Relationships:
-------------------------


Unnamed: 0,node_name_1,node_1_labels,relationship_type,node_name_2,node_2_labels
0,AD,[Country],IMPORTS_FROM,AR,[Country]
1,AD,[Country],IMPORTS_FROM,AT,[Country]
2,AD,[Country],IMPORTS_FROM,AU,[Country]
3,AD,[Country],IMPORTS_FROM,BE,[Country]
4,AD,[Country],IMPORTS_FROM,BG,[Country]
...,...,...,...,...,...
14949,ZW,[Country],IMPORTS_FROM,VN,[Country]
14950,ZW,[Country],IMPORTS_FROM,XX,[Country]
14951,ZW,[Country],IMPORTS_FROM,ZA,[Country]
14952,ZW,[Country],IMPORTS_FROM,ZM,[Country]


-------------------------
  Density: 0.4
-------------------------


## In the Neo4j GUI, run the following query with graph output and rearrange the nodes with your mouse if necessary:

```match (n) return n```