In [124]:
import matplotlib.pyplot as plt
from datetime import datetime
import requests as r

import pandas as pd
import numpy as np
import psycopg2
import config
import gmaps
import json

with open('stops_info.json','r') as sf:
    stop_locations = json.load(sf)
    
from IPython.display import display, clear_output
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

## Query Function for PostgreSQL Communication

In [84]:
def query(Query):
    
    try:

        conn = psycopg2.connect(
            database=config.Database,
            user=config.DBuser,
            password=config.DBpswd,
            
            # Tunneled connection
            host=config.TunnelHost,
            port=config.TunnelPort
            
            # On Eduroam
#             host=config.host,
#             port=config.port
        )
        cursor = conn.cursor()
        
        cursor.execute(Query)
        conn.commit()
        result = cursor.fetchall()
        
        cursor.close()
        conn.close()
        
        return result

    except Exception as e:
        print("Uh oh, can't connect. Invalid dbname, user or password?")
        print(repr(e))

###  Setting up stop locations table, with id and names

In [143]:
pd.DataFrame(stop_locations).to_csv("stop_locations.csv")

## Getting Weather Data From Dark Sky API

In [85]:
def weather_query(timestamp):
    
    request_string = f"https://api.darksky.net/forecast/6d1a9498f039837cce2657a75bd43748/53.3498, -6.2603,{timestamp}?exclude=currently,flags"

    try:
        response = r.get(request_string)
    except Exception as e:
        print(repr(e))
        return False
        
    return json.loads(response.content.decode('utf-8'))

In [86]:
# One year of data starting at time 
start=1514764801
end=1546300801
step=86400

data = {}
count = 0
total = 365
while start < end:
    count += 1
    pct = int(count/total * 100) 
    clear_output(wait=True)
    display(f"Completed: {count}/{total} [{pct}%] <{'=' * int(pct/5)} {' ' * (19 - (int(pct/5) +1))}>")
    data[start] = weather_query(start)
    start += step



In [118]:
hourly_data = {}

data_keys = list(data.keys())

count=1
for j in data_keys:
    for i in range(len(data[j]['hourly']['data'])):
        count += 1
        hourly_data[data[j]['hourly']['data'][i]['time']] = data[j]['hourly']['data'][i].copy()
        # wont work - remeber that dicts are pass by ref

In [120]:
hourly_keys = list(hourly_data.keys())

for hkey in hourly_keys:
    if 'time' in hourly_data[hkey]:
        hourly_data[hkey].pop('time')
    if 'summary' in hourly_data[hkey]:
        hourly_data[hkey].pop('summary')
    if 'windBearing' in hourly_data[hkey]:
        hourly_data[hkey].pop('windBearing')
    if 'precipProbability' in hourly_data[hkey]:    
        hourly_data[hkey].pop('precipProbability')
    if 'apparentTemperature' in hourly_data[hkey]:
        hourly_data[hkey].pop('apparentTemperature')
    if 'dewPoint' in hourly_data[hkey]:
        hourly_data[hkey].pop('dewPoint')
    if 'windGust' in hourly_data[hkey]:
        hourly_data[hkey].pop('windGust')
    if 'uvIndex' in hourly_data[hkey]:    
        hourly_data[hkey].pop('uvIndex')
    if 'precipType' in hourly_data[hkey]:    
        hourly_data[hkey].pop('precipType')
    if 'ozone' in hourly_data[hkey]:    
        hourly_data[hkey].pop('ozone')

In [134]:
df = pd.DataFrame(hourly_data)
df = df.T

df.index = pd.to_datetime(df.index*1000000000)

df.to_csv("weather_darksky.csv")

## Data Analysis

In [136]:
trips = query("SELECT * FROM trips;")

In [145]:
trips_df = pd.DataFrame(trips)
trips_df.columns = ['dayofservice', 'tripid', 'lineid', 'routeid', 'direction', 'planned_arr','planned_dep','actual_arr','actual_dep']

In [168]:
# Forward direction 145 routes. All from data. 
trips_145 = trips_df[trips_df.lineid=='145']
trips_145_forward = trips_145[trips_145.direction=="1 "]

#trip ids for 145 bus routes in the forward direction.
trips_145_forward_tripids = trips_145_forward.tripid

In [205]:
full_string = ""

for i in trips_145_forward_tripids[0:1000]:
    full_string += f" tripid='{i}' OR"
    
full_string = "SELECT * FROM leavetimes WHERE " + full_string[:-3] + ";"

In [206]:
ftrips_145_data = {}

temp_data = query(full_string)  

## Initial Model Training and Testing

### Random Forest Decision Tree training