In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import myutils as u
import math
import datetime
import warnings
import pandas.errors
warnings.simplefilter(action='ignore', category=Warning)

# Import Data

In [4]:
ALL_TC_SITES = u.import_data("SITES")
ALL_TC = u.import_data("TRAFFIC_COUNTS")
ALL_OTP = u.import_data("ON_TIME")
ALL_STOPS = u.import_data("STOPS")
ALL_CONSTR = u.import_data("LANE_CLOSURE")
ALL_ROADS = u.import_data("ROAD_NETWORK")

# Consts
MIN = pd.to_timedelta("1 min")
DAY = pd.to_timedelta("1 day")
SITES_NO = {'McPhillips': 0,'Henderson': 1,'Pembina': 2,'Inkster': 3,'Nichol': 4,'Lagimodiere': 5,'Disraeli': 6,'Marion': 7} 

In [9]:
def get_stops_distances(tc_site, distance=500,key=None):
    TC_SITE = ALL_TC_SITES[ALL_TC_SITES["Street"] == tc_site]
    site_coords = TC_SITE[["Lat","Long"]].values[0]
    stop_coords = ALL_STOPS[["Lat","Long"]].values
    distances = u.distance_within(site_coords,stop_coords,distance,key=key)
    return distances 

def get_stops_nearby(tc_site, distance=500):
    stops_index = get_stops_distances(tc_site, distance, "index")
    return ALL_STOPS.iloc[stops_index]

def get_traffic_counts(tc_site,date,freq=None):
    # Obtain the traffic count information
    TC_SITE = ALL_TC_SITES[ALL_TC_SITES["Street"] == tc_site]
    TC = ALL_TC[(ALL_TC["Timestamp"]>date) & (ALL_TC["Timestamp"]<= date+1*DAY) & (ALL_TC["Street"]==TC_SITE["Street"].item())]
    TC.loc[:,"Time Interval"] = [u.fmt_timestamp(i) for i in TC["Timestamp"]]

    if freq != None:
    # Select the frequency with which the data is aggregated
        time_range = date + pd.timedelta_range(start="0:00:00",end="24:00:00",freq=freq)
        AGGR_TC = pd.DataFrame()
        cols = ["Northbound","Southbound","Eastbound","Westbound","Total"]
        for i in range(len(time_range)-1):
            lower_lim = time_range[i]
            upper_lim = time_range[i+1]
            res = TC[TC["Timestamp"] == upper_lim]
            res.loc[:,cols] = TC.loc[(TC["Timestamp"] <= upper_lim) & (TC["Timestamp"] > lower_lim),cols].sum(axis=0).values
            AGGR_TC = pd.concat([AGGR_TC,res])
        TC = AGGR_TC
    return TC.sort_values("Timestamp")

def get_otp(start,end):
    return ALL_OTP[(ALL_OTP["Scheduled Time"] <= end) & (ALL_OTP["Scheduled Time"] > start)]

def prepare_data(tc_site,date,distance=500,freq=None):
    AFF_STOPS = get_stops_nearby(tc_site, distance)
    distances = get_stops_distances(tc_site, distance, key="distance")
    TC = get_traffic_counts(tc_site,date,freq)
    OTP = get_otp(date, date+1*DAY)
    DF = pd.DataFrame()
    for timestamp in TC["Timestamp"]:
        TC_i = TC[TC["Timestamp"] == timestamp]
        df1 = AFF_STOPS.loc[:,["Stop Number"]]
        df1.loc[:,"Site"] = tc_site
        df1.loc[:,"Distance"] = distances
        df1.loc[:,"Same Street"] = (AFF_STOPS["Street"] == TC_i["Street"].item()).replace({True:1,False:0})
        # Directional & Total traffic count
        df1.loc[(AFF_STOPS["Street"] != tc_site),"Directional"] = 0
        for direction in ["Northbound","Southbound","Eastbound","Westbound"]:
            df1.loc[(AFF_STOPS["Street"] == tc_site) & (AFF_STOPS["Direction"] == direction),"Directional"] = TC_i[direction].item()
        df1.loc[:,"Total"] = TC_i["Total"].item()
        # 
        df1.loc[:,["Arrivals","Average OTP"]] = [(len(OTP.loc[OTP["Stop Number"]==stop_no,"Deviation"].values),OTP.loc[OTP["Stop Number"]==stop_no,"Deviation"].values.mean()) 
                                                                for stop_no in AFF_STOPS["Stop Number"]]
        df1.loc[:,"Time interval"] = u.fmt_timestamp(timestamp)
        df1.loc[:,"Date"] = TC_i["Timestamp"].item().date()
        DF = pd.concat([DF,df1])
    return DF

date = pd.to_datetime("2021-08-23")
tc_site = "McPhillips"
distance=500
freq = "2h"

AFF_STOPS = get_stops_nearby(tc_site, distance)
TC = get_traffic_counts(tc_site,date,freq)
prepare_data(tc_site,date,distance,freq)
# AFF_STOPS

Unnamed: 0,Stop Number,Site,Distance,Same Street,Directional,Total,Arrivals,Average OTP,Time interval,Date
1483,30199,McPhillips,421.070384,0,0.0,501.0,33,-84.787879,2.0,2021-08-23
1628,30353,McPhillips,329.076830,1,281.0,501.0,65,-3.646154,2.0,2021-08-23
1637,30362,McPhillips,381.455906,0,0.0,501.0,93,55.172043,2.0,2021-08-23
1638,30363,McPhillips,263.378003,0,0.0,501.0,109,-50.266055,2.0,2021-08-23
1639,30364,McPhillips,271.147088,0,0.0,501.0,15,-55.133333,2.0,2021-08-23
...,...,...,...,...,...,...,...,...,...,...
2150,30918,McPhillips,337.529056,0,0.0,1680.0,14,-22.642857,0.0,2021-08-24
2151,30919,McPhillips,342.831977,0,0.0,1680.0,14,-19.571429,0.0,2021-08-24
2157,30925,McPhillips,468.780146,0,0.0,1680.0,9,-54.888889,0.0,2021-08-24
2158,30926,McPhillips,350.956296,0,0.0,1680.0,9,-54.888889,0.0,2021-08-24


# Remove Irrelevant Bus Stops

In [6]:
ALL_STOPS

Unnamed: 0,Stop Number,Stop Name,Lat,Long,Direction,Street,At
0,10001,Southbound Osborne at Mulvey,49.871261,-97.139518,Southbound,Osborne,Mulvey
1,10002,Southbound Osborne at Woodward,49.868819,-97.137553,Southbound,Osborne,Woodward
2,10003,Southbound Osborne at Brandon,49.867880,-97.136795,Southbound,Osborne,Brandon
3,10004,Southbound Osborne at Hethrington,49.866522,-97.135707,Southbound,Osborne,Hethrington
4,10005,Southbound Osborne at Morley,49.865164,-97.134604,Southbound,Osborne,Morley
...,...,...,...,...,...,...,...
5151,62018,Eastbound Parker at Beaumont,49.847806,-97.164596,Eastbound,Parker,Beaumont
5152,62021,Northbound Eaglewood at Longspur,49.780139,-97.196271,Northbound,Eaglewood,Longspur
5153,62022,Southbound Eaglewood at Longspur,49.780498,-97.196785,Southbound,Eaglewood,Longspur
5154,62023,Westbound Bison at Appleford,49.791002,-97.209359,Westbound,Bison,Appleford


In [8]:
ALL_TC_SITES

Unnamed: 0,Site,Street,Near,Lat,Long
0,McPhillips And 190m South Of Leila,McPhillips,Leila,49.951733,-97.149032
1,Henderson And 55M S Of Frasers Grove,Henderson,Grove,49.934985,-97.096166
2,Pembina And 280 N Of Adamar,Pembina,Adamar,49.826952,-97.152312
3,Inkster And 130W Of Wyatt,Inkster,Wyatt,49.945949,-97.187758
4,Nichol And St.Marys,Nichol,St.Marys,49.850579,-97.112202
5,Lagimodiere And 80M N Of Burmac,Lagimodiere,Burmac,49.848695,-97.049665
6,Disraeli Bridge,Disraeli,Bridge,49.906744,-97.123028
7,Marion And 260M E Of Dupuy,Marion,Dupuy,49.8819,-97.089342


In [16]:
stop_coords = ALL_STOPS[["Lat","Long"]].values
all_stops = set(ALL_STOPS.index)
valid_stops = set()
for org in ALL_TC_SITES[["Lat","Long"]].values:
    valid_stop_i = set(u.distance_within(org,stop_coords,d=5000,key="index"))
    valid_stops |= valid_stop_i
invalid_stops = all_stops - valid_stops


In [46]:
ALL_STOPS.loc[pd.Index(invalid_stops)]
stop = ALL_STOPS.loc[pd.Index(invalid_stops),["Lat","Long"]].values[1]
print(ALL_STOPS.loc[pd.Index(invalid_stops)].values[1])
tc_stations = ALL_TC_SITES[["Lat","Long"]].values
u.distance_within(stop,tc_stations,d=1e9,key="distance")


[61296 'Eastbound Waverley at Eaglewood East' 49.77774502516655
 -97.19327577886658 'Eastbound' 'Waverley' 'Eaglewood East']


[19626.608410526704,
 18840.221977039204,
 6218.283362164325,
 18728.458269007497,
 9982.501968677187,
 12992.155269663035,
 15219.78914282563,
 13788.754995512525]

In [35]:
VALID_STOPS = ALL_STOPS.loc[pd.Index(valid_stops)]
VALID_STOPS

Unnamed: 0,Stop Number,Stop Name,Lat,Long,Direction,Street,At
0,10001,Southbound Osborne at Mulvey,49.871261,-97.139518,Southbound,Osborne,Mulvey
1,10002,Southbound Osborne at Woodward,49.868819,-97.137553,Southbound,Osborne,Woodward
2,10003,Southbound Osborne at Brandon,49.867880,-97.136795,Southbound,Osborne,Brandon
3,10004,Southbound Osborne at Hethrington,49.866522,-97.135707,Southbound,Osborne,Hethrington
4,10005,Southbound Osborne at Morley,49.865164,-97.134604,Southbound,Osborne,Morley
...,...,...,...,...,...,...,...
5147,62014,Northbound Southwest Transitway at Beaumont St...,49.848513,-97.166058,Northbound,Southwest Transitway,Beaumont Station (Route 29)
5148,62015,Southbound Southwest Transitway at Beaumont St...,49.848788,-97.165797,Southbound,Southwest Transitway,"Beaumont Station (Routes 641, 677)"
5149,62016,Westbound Markham at Markham Station,49.808174,-97.156658,Westbound,Markham,Markham Station
5150,62017,Northbound Seel Station at Seel Station,49.842344,-97.168990,Northbound,Seel Station,Seel Station


# Calculate Weights

In [25]:
def calculateWeight(stop:tuple,tc_stations:list) -> tuple:
    distances = u.distance_within(stop,tc_stations,d=1e9,key="distance")
    weights = [1/dist for dist in distances]
    return weights

# calculateWeight()

In [33]:
stop = ALL_STOPS[["Lat","Long"]].values[0]
tc_stations = ALL_TC_SITES[["Lat","Long"]].values
weights = calculateWeight(stop,tc_stations)
weights

[0.00011130904496978019,
 0.00012911858170819166,
 0.00019931599321840472,
 0.00011105225736408663,
 0.00033071469373298004,
 0.00014450576690524196,
 0.00024252953102636343,
 0.0002638947790510056]

In [32]:
counts = [200,230,100,120,300,190,231,349]
tc_index = np.dot(weights,counts)
tc_index

0.3600110563801519