# W205 Project 3
## AGM Business Improvement

### Team Members
#### Member 1
#### Member 2
#### Member 3

In [6]:
import neo4j
import csv
import math
import numpy as np
import pandas as pd

import psycopg2

ModuleNotFoundError: ignored

### Support Functions
Reused from W205 Code / Labs

In [None]:
connection = psycopg2.connect(
    user = "postgres",
    password = "ucb",
    host = "postgres",
    port = "5432",
    database = "postgres"
)

cursor = connection.cursor()

In [2]:
#
# function to run a select query and return rows in a pandas dataframe
# pandas puts all numeric values from postgres to float
# if it will fit in an integer, change it to integer
#

def my_select_query_pandas(query, rollback_before_flag, rollback_after_flag):
    "function to run a select query and return rows in a pandas dataframe"
    
    if rollback_before_flag:
        connection.rollback()
    
    df = pd.read_sql_query(query, connection)
    
    if rollback_after_flag:
        connection.rollback()
    
    # fix the float columns that really should be integers
    
    for column in df:
    
        if df[column].dtype == "float64":

            fraction_flag = False

            for value in df[column].values:
                
                if not np.isnan(value):
                    if value - math.floor(value) != 0:
                        fraction_flag = True

            if not fraction_flag:
                df[column] = df[column].astype('Int64')
    
    return(df)

In [3]:
def my_read_csv_file(file_name, limit):
    "read the csv file and print only the first limit rows"
    
    csv_file = open(file_name, "r")
    
    csv_data = csv.reader(csv_file)
    
    i = 0
    
    for row in csv_data:
        i += 1
        if i <= limit:
            print(row)
            
    print("\nPrinted ", min(limit, i), "lines of ", i, "total lines.")

### Drop tables, create and load from CSV files

In [None]:
connection.rollback()

query = """

drop table if exists stations;
"""

cursor = connection.cursor()
cursor.execute(query)

connection.commit()

In [None]:
connection.rollback()

query = """

drop table if exists lines;

"""

cursor = connection.cursor()
cursor.execute(query)

connection.commit()

In [None]:
connection.rollback()

query = """

drop table if exists travel_times;

"""

cursor = connection.cursor()
cursor.execute(query)

connection.commit()

In [None]:
connection.rollback()

query = """

create table stations (
    station varchar(32),
    latitude numeric(9,6),
    longitude numeric(9,6),
    transfer_time numeric(3),
    primary key (station)
);

"""

cursor = connection.cursor()
cursor.execute(query)

connection.commit()

In [None]:
connection.rollback()

query = """

create table lines (
    line varchar(6),
    sequence numeric(2),
    station varchar(32),
    primary key (line, sequence)
);

"""

cursor = connection.cursor()
cursor.execute(query)

connection.commit()

In [None]:
connection.rollback()

query = """

create table travel_times (
    station_1 varchar(32),
    station_2 varchar(32),
    travel_time numeric(3),
    primary key (station_1, station_2)
);

"""

cursor = connection.cursor()
cursor.execute(query)

connection.commit()

In [None]:
connection.rollback()

query = """

copy stations
from '/user/projects/project-3/exercise/stations.csv' delimiter ',' NULL '' csv header;

"""

cursor = connection.cursor()
cursor.execute(query)

connection.commit()

In [None]:
connection.rollback()

query = """

copy lines
from '/user/projects/project-3/exercise/lines.csv' delimiter ',' NULL '' csv header;

"""

cursor = connection.cursor()
cursor.execute(query)

connection.commit()

In [None]:
connection.rollback()

query = """

copy travel_times
from '/user/projects/project-3/exercise/travel_times.csv' delimiter ',' NULL '' csv header;

"""

cursor = connection.cursor()
cursor.execute(query)

connection.commit()

### Establish Neo4j connection and helper functions

In [None]:
driver = neo4j.GraphDatabase.driver(uri="neo4j://neo4j:7687", auth=("neo4j","w205"))
session = driver.session(database="neo4j")

In [None]:
def my_neo4j_wipe_out_database():
    "wipe out database by deleting all nodes and relationships"
    
    query = "match (node)-[relationship]->() delete node, relationship"
    session.run(query)
    
    query = "match (node) delete node"
    session.run(query)
  
def my_neo4j_run_query_pandas(query, **kwargs):
    "run a query and return the results in a pandas dataframe"
    
    result = session.run(query, **kwargs)
    
    df = pd.DataFrame([r.values() for r in result], columns=result.keys())
    
    return df

def my_neo4j_create_node(station_name):
    "create a node with label Station"
    
    query = """
    
    CREATE (:Station {name: $station_name})
    
    """
    
    session.run(query, station_name=station_name)

def my_neo4j_create_relationship_one_way(from_station, to_station, weight):
    "create a relationship one way between two stations with a weight"
    
    query = """
    
    MATCH (from:Station), 
          (to:Station)
    WHERE from.name = $from_station and to.name = $to_station
    CREATE (from)-[:LINK {weight: $weight}]->(to)
    
    """
    
    session.run(query, from_station=from_station, to_station=to_station, weight=weight)

def my_neo4j_create_relationship_two_way(from_station, to_station, weight):
    "create relationships two way between two stations with a weight"
    
    query = """
    
    MATCH (from:Station), 
          (to:Station)
    WHERE from.name = $from_station and to.name = $to_station
    CREATE (from)-[:LINK {weight: $weight}]->(to),
           (to)-[:LINK {weight: $weight}]->(from)
    
    """
    
    session.run(query, from_station=from_station, to_station=to_station, weight=weight)



In [None]:
my_neo4j_wipe_out_database()

### Create graphs from csv station files

#### Query the list of stations and create the departure and arrival nodes in the graph

Use the query from 3.2.1 "Query the list of stations"

For each station X, create two nodes:
* depart X
* arrive X

Use the function my_neo4j_create_node() defined above

For example, West Oakland:
* my_neo4j_create_node('depart West Oakland')
* my_neo4j_create_node('arrive West Oakland')

In [6]:
connection.rollback()

query = """

select station
from stations
order by station

"""

cursor.execute(query)

connection.rollback()

rows = cursor.fetchall()

for row in rows:
    
    station = row[0]
    
    my_neo4j_create_node('depart ' + station)
    my_neo4j_create_node('arrive ' + station)

#### Query the list of stations and the lines they serve, create line nodes, and create relationships between the line nodes and the departure and arrival nodes with weight 0

Use the query from "Query the list of stations and the lines they serve"

For each station X and each line Y that the station serves:
* Create a line node
* Create a relationship from the departure node to the line node with weight 0
* Create a relationship from the line node to the arrival node with weight 0

Use the function my_neo4j_create_relationship_one_way() defined above to create the relationships

In [None]:
connection.rollback()

query = """

select station, line
from lines
order by station, line

"""

cursor.execute(query)

connection.rollback()

rows = cursor.fetchall()

for row in rows:
    
    station = row[0]
    line = row[1]
    
    depart = 'depart ' + station
    arrive = 'arrive ' + station
    line_station = line + ' ' + station
    
    my_neo4j_create_node(line_station)
    my_neo4j_create_relationship_one_way(depart, line_station, 0)
    my_neo4j_create_relationship_one_way(line_station, arrive, 0)

#### Query the list of all possible line transfers and the transfer times, create a relationship for each transfer with the transfer time as the weight

Use the query from 3.2.5 "Query the list of all possible line transfers and the transfer times"

For each station X, from line Y, to line Z, create a relationship from Y's line node to Z's line node with the weight set to the transfer time

In [None]:
connection.rollback()

query = """

select a.station, a.line as from_line, b.line as to_line, s.transfer_time
from lines a
     join lines b
       on a.station = b.station and a.line <> b.line 
     join stations s
       on a.station = s.station
order by 1, 2, 3

"""

cursor.execute(query)

connection.rollback()

rows = cursor.fetchall()

for row in rows:
    
    station = row[0]
    from_line = row[1]
    to_line = row[2]
    transfer_time = int(row[3])
    
    from_station = from_line + ' ' + station
    to_station = to_line + ' ' + station
    
    my_neo4j_create_relationship_one_way(from_station, to_station, transfer_time)

#### Query the list of all segments between each station and its adjoining stations, create a relationship for each segment both ways

Use the query from 3.2.7 "Query the list of all segments between each station and its adjoining stations"

For each segment from station X to station Y on line Z, create two relationships:
* From X's line node to Y's line node with travel time
* From Y's line node to X's line node with travel time

Use the function my_neo4j_create_relationship_two_way() defined above which will create both relationships 

In [None]:
connection.rollback()

query = """

select a.line, a.station as from_station, b.station as to_station, t.travel_time
from lines a
  join lines b
    on a.line = b.line and b.sequence = (a.sequence + 1)
  join travel_times t
    on (a.station = t.station_1 and b.station = t.station_2)
        or (a.station = t.station_2 and b.station = t.station_1)
order by line, from_station, to_station

"""

cursor.execute(query)

connection.rollback()

rows = cursor.fetchall()

for row in rows:
    
    line = row[0]
    from_station = line + ' ' + row[1]
    to_station = line + ' ' + row[2]
    travel_time = int(row[3])
    
    my_neo4j_create_relationship_two_way(from_station, to_station, travel_time)

### Begin PROJECT 3 CODE HERE

#### Define search algorithms

In [6]:
# *******************************************************
# Shortest Path algorithm from W205 
# *******************************************************
def my_neo4j_shortest_path(from_station, to_station):
    "given a from station and to station, run and print the shortest path"
    
    query = "CALL gds.graph.drop('ds_graph', false)"
    session.run(query)

    query = "CALL gds.graph.project('ds_graph', 'Station', 'LINK', {relationshipProperties: 'weight'})"
    session.run(query)

    query = """

    MATCH (source:Station {name: $source}), (target:Station {name: $target})
    CALL gds.shortestPath.dijkstra.stream(
        'ds_graph', 
        { sourceNode: source, 
          targetNode: target, 
          relationshipWeightProperty: 'weight'
        }
    )
    YIELD index, sourceNode, targetNode, totalCost, nodeIds, costs, path
    RETURN
        gds.util.asNode(sourceNode).name AS from,
        gds.util.asNode(targetNode).name AS to,
        totalCost,
        [nodeId IN nodeIds | gds.util.asNode(nodeId).name] AS nodes,
        costs
    ORDER BY index

    """

    result = session.run(query, source=from_station, target=to_station)
    
    for r in result:
        
        total_cost = int(r['totalCost'])
        
        print("\n--------------------------------")
        print("   Total Cost: ", total_cost)
        print("   Minutes: ", round(total_cost / 60.0,1))
        print("--------------------------------")
        
        nodes = r['nodes']
        costs = r['costs']
        
        i = 0
        previous = 0
        
        for n in nodes:
            
            print(n + ", " + str(int(costs[i]) - previous)  + ", " + str(int(costs[i])))
            
            previous = int(costs[i])
            i += 1

#### Project To Do
1. Add modules to injest real time traffic data (use Redis or Redis fixed document as a proxy)
2. Import train schedules, planned route outages (use MongoDB)
3. Based on 1 and 2 create new graphs or modify cost / weights across nodes on existing graphs to eliminate paths and change time cost across nodes
4. Create 3 or more search, sort algorithms. Use the above as a template
5. Overlay Google maps API
