In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import myutils as u
import warnings
warnings.simplefilter(action='ignore', category=Warning)

Import 2020 Bus OTP

In [2]:
ALL_TC_SITES = u.import_data("SITES")
ALL_TC = u.import_data("TRAFFIC_COUNTS")
ALL_OTP = u.import_data("ON_TIME")
ALL_STOPS = u.import_data("STOPS")
ALL_CONSTR = u.import_data("LANE_CLOSURE")
ALL_ROADS = u.import_data("ROAD")

DAY = pd.to_timedelta("1 day")
HOUR = pd.to_timedelta("1 hour")
MIN = pd.to_timedelta("1 min")

In [3]:
def prepare_data_traffic(date,distance=2000,freq=None):
    def get_stops_distances(tc_site, distance=2000,key=None):
        TC_SITE = ALL_TC_SITES[ALL_TC_SITES["Street"] == tc_site]
        site_coords = TC_SITE[["Lat","Long"]].values[0]
        stop_coords = ALL_STOPS[["Lat","Long"]].values
        distances = u.distance_within(site_coords,stop_coords,distance,key=key)
        return distances 

    def get_stops_nearby(tc_site, distance=2000):
        stops_index = get_stops_distances(tc_site, distance, "index")
        return ALL_STOPS.iloc[stops_index]

    def get_traffic_counts(tc_site,date,freq=None):
        # Obtain the traffic count information
        TC_SITE = ALL_TC_SITES[ALL_TC_SITES["Street"] == tc_site]
        TC = ALL_TC[(ALL_TC["Timestamp"]>date) & (ALL_TC["Timestamp"]<= date+1*DAY) & (ALL_TC["Street"]==TC_SITE["Street"].item())]
        TC.loc[:,"Time Interval"] = [u.fmt_timestamp(i) for i in TC["Timestamp"]]

        if freq != None:
        # Select the frequency with which the data is aggregated
            time_range = date + pd.timedelta_range(start="0:00:00",end="24:00:00",freq=freq)
            AGGR_TC = pd.DataFrame()
            cols = ["Northbound","Southbound","Eastbound","Westbound","Total"]
            for i in range(len(time_range)-1):
                lower_lim = time_range[i]
                upper_lim = time_range[i+1]
                res = TC[TC["Timestamp"] == upper_lim]
                res.loc[:,cols] = TC.loc[(TC["Timestamp"] <= upper_lim) & (TC["Timestamp"] > lower_lim),cols].sum(axis=0).values
                AGGR_TC = pd.concat([AGGR_TC,res])
            TC = AGGR_TC
        return TC.sort_values("Timestamp")

    def get_otp(start,end):
        return ALL_OTP[(ALL_OTP["Scheduled Time"] <= end) & (ALL_OTP["Scheduled Time"] > start)]



    ######
    DF = pd.DataFrame()
    for tc_site in ['McPhillips','Henderson','Pembina','Inkster',"St Mary's",'Lagimodiere','Disraeli','Marion']:
        AFF_STOPS = get_stops_nearby(tc_site, distance)
        distances = get_stops_distances(tc_site, distance,key="distance")
        TC = get_traffic_counts(tc_site,date,freq)
        OTP = get_otp(date, date+1*DAY)
        df0 = pd.DataFrame()
        for timestamp in TC["Timestamp"]:
            TC_i = TC[TC["Timestamp"] == timestamp]
            df1 = AFF_STOPS.loc[:,["Stop Number","Street"]]
            df1.loc[:,"Site"] = tc_site
            df1.loc[:,"Distance"] = distances
            df1.loc[:,"Same Street"] = (AFF_STOPS["Street"] == TC_i["Street"].item()).replace({True:1,False:0})
            # Directional & Total traffic count
            df1.loc[(AFF_STOPS["Street"] != tc_site),"Directional"] = 0
            for direction in ["Northbound","Southbound","Eastbound","Westbound"]:
                df1.loc[(AFF_STOPS["Street"] == tc_site) & (AFF_STOPS["Direction"] == direction),"Directional"] = TC_i[direction].item()
            df1.loc[:,"Total"] = TC_i["Total"].item()
            for stop_no in AFF_STOPS["Stop Number"]:
                cond_time = (OTP["Scheduled Time"] > timestamp - pd.to_timedelta(freq)) & (OTP["Scheduled Time"] <= timestamp)
                df1.loc[(df1["Stop Number"]==stop_no),"Arrivals"] = len(OTP.loc[(OTP["Stop Number"]==stop_no) & cond_time,"Deviation"].values)
                df1.loc[(df1["Stop Number"]==stop_no),"Average OTP"] = OTP.loc[(OTP["Stop Number"]==stop_no) & cond_time,"Deviation"].values.mean()
            df1.loc[:,"Timestamp"] = TC_i["Timestamp"].item()
            df0 = pd.concat([df0,df1]).dropna()
        DF = pd.concat([DF,df0]).drop_duplicates()
    if len(DF) > 0:
        DF.loc[:,"Time of Day"] = [u.fmt_timestamp(timestamp) for timestamp in DF["Timestamp"]]
        DF.loc[:,"Day of Week"] = [int(timestamp.day_of_week) for timestamp in DF["Timestamp"]]
        DF.loc[:,"Day of Year"] = [int(timestamp.day_of_year) for timestamp in DF["Timestamp"]]
        DF.loc[:,"Time value"] = [int(timestamp.to_datetime64()) for timestamp in DF["Timestamp"]]
        DF.loc[:,"Number of Lanes"] = ALL_STOPS.set_index("Stop Number").loc[DF["Stop Number"],"Number of Lanes"].values
    return DF

In [55]:
# freq = "1h"
# distance = 2000

# DF = pd.DataFrame()
# for day in pd.date_range(start="2021-08-10",end="2021-09-01"):
#     date = pd.to_datetime(day.date())
#     df = prepare_data_traffic(date,distance,freq)
#     DF = pd.concat([DF,df]).dropna()
#     DF.to_csv("data/evaluations/traffic_aug_1h_1.csv")
# DF.to_csv("data/evaluations/traffic_aug_1h_1.csv")


In [4]:
def time_of_day(timestamp):
    return pd.to_timedelta(timestamp.strftime("%H:%M:%S"))

def get_construction_sites(timestamp):
    time = time_of_day(timestamp)
    constr = ALL_CONSTR[(ALL_CONSTR["Date Closed - From"] <= timestamp) & (ALL_CONSTR["Date Closed - To"] > timestamp)]
    constr["Time Closed - From"]  = [time_of_day(timestamp) for timestamp in pd.to_datetime(constr["Time Closed - From"])]
    constr["Time Closed - To"]    = [time_of_day(timestamp) for timestamp in pd.to_datetime(constr["Time Closed - To"])]
    constr["Time Closed - To"]    = constr["Time Closed - To"].replace({pd.Timedelta('0 days 00:00:00'):pd.Timedelta('1 days 00:00:00')})
    constr = constr[(constr["Time Closed - From"] <= time) & (constr["Time Closed - To"] > time)]
    return ALL_CONSTR.loc[constr.index]

def add_constr_info(df,d):


    for i in df.index:
        df_ = df.loc[i,["Stop Number","Timestamp","Street"]]
        stop_no,timestamp,street = df_.values[0]  
        stop_no,timestamp,street = df_.values
        stop_loc = ALL_STOPS.loc[ALL_STOPS["Stop Number"]==stop_no,["Lat","Long"]].values[0]

        constr = get_construction_sites(timestamp)
        constr_loc = constr.loc[:,["Lat","Long"]].values
        nearby_constr_indexes = u.distance_within(stop_loc,constr_loc,d=d,key="index")
        num_constr = len(nearby_constr_indexes)

        constr_streets = constr.iloc[nearby_constr_indexes]["Street"]
        num_same_street = len(constr_streets[constr_streets == street])

        df.loc[i,["Nearby Constr","Same Street Constr"]] = num_constr, num_same_street
    return df
add_constr_info(df,2000)


NameError: name 'df' is not defined

In [5]:
DF = pd.read_csv("data/evaluations/traffic_nov_1h.csv").drop(columns="Unnamed: 0")
DF

Unnamed: 0,Stop Number,Street,Site,Distance,Same Street,Directional,Total,Arrivals,Average OTP,Timestamp,Time of Day,Day of Week,Day of Year,Time value,Number of Lanes
0,30001,McPhillips,McPhillips,1991.190701,1,204.0,619.0,1.0,70.000000,2021-11-01 06:00:00,6.0,0,305,1635746400000000000,3.0
1,30199,Leila,McPhillips,421.070384,0,0.0,619.0,2.0,-0.500000,2021-11-01 06:00:00,6.0,0,305,1635746400000000000,2.0
2,30207,Partridge,McPhillips,1691.791193,0,0.0,619.0,1.0,35.000000,2021-11-01 06:00:00,6.0,0,305,1635746400000000000,2.0
3,30209,McGregor,McPhillips,1718.386291,0,0.0,619.0,1.0,-9.000000,2021-11-01 06:00:00,6.0,0,305,1635746400000000000,2.0
4,30211,McGregor,McPhillips,1727.524447,0,0.0,619.0,1.0,-16.000000,2021-11-01 06:00:00,6.0,0,305,1635746400000000000,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
472683,50260,Marion,Marion,880.704855,1,158.0,353.0,3.0,-110.666667,2021-12-01 00:00:00,0.0,2,335,1638316800000000000,4.0
472684,50261,Marion,Marion,950.386016,1,195.0,353.0,3.0,-103.666667,2021-12-01 00:00:00,0.0,2,335,1638316800000000000,4.0
472685,50295,Lagimodiere,Marion,1329.174369,0,0.0,353.0,1.0,-90.000000,2021-12-01 00:00:00,0.0,2,335,1638316800000000000,2.0
472686,50621,Marion,Marion,745.765722,1,195.0,353.0,3.0,-115.333333,2021-12-01 00:00:00,0.0,2,335,1638316800000000000,4.0


In [14]:
# df= DF.copy()
for j in df.index[i:]:
    df_ = df.loc[j,["Stop Number","Timestamp","Street"]]
    stop_no,timestamp,street = df_.values  
    print(timestamp)
    stop_loc = ALL_STOPS.loc[ALL_STOPS["Stop Number"]==stop_no,["Lat","Long"]].values[0]

    constr = get_construction_sites(pd.Timestamp(timestamp))
    constr_loc = constr.loc[:,["Lat","Long"]].values
    nearby_constr_indexes = u.distance_within(stop_loc,constr_loc,d=2000,key="index")
    num_constr = len(nearby_constr_indexes)

    constr_streets = constr.iloc[nearby_constr_indexes]["Street"]
    num_same_street = len(constr_streets[constr_streets == street])

    df.loc[j,["Nearby Constr","Same Street Constr"]] = num_constr, num_same_street

2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 18:00:00
2021-11-14 19:00:00
2021-11-14 19:00:00
2021-11-14 19:00:00
2021-11-14 19:00:00
2021-11-14 19:00:00


In [15]:
i

208882

In [16]:
df

Unnamed: 0,Stop Number,Street,Site,Distance,Same Street,Directional,Total,Arrivals,Average OTP,Timestamp,Time of Day,Day of Week,Day of Year,Time value,Number of Lanes,Nearby Constr,Same Street Constr
0,30001,McPhillips,McPhillips,1991.190701,1,204.0,619.0,1.0,70.000000,2021-11-01 06:00:00,6.0,0,305,1635746400000000000,3.0,0.0,0.0
1,30199,Leila,McPhillips,421.070384,0,0.0,619.0,2.0,-0.500000,2021-11-01 06:00:00,6.0,0,305,1635746400000000000,2.0,0.0,0.0
2,30207,Partridge,McPhillips,1691.791193,0,0.0,619.0,1.0,35.000000,2021-11-01 06:00:00,6.0,0,305,1635746400000000000,2.0,2.0,0.0
3,30209,McGregor,McPhillips,1718.386291,0,0.0,619.0,1.0,-9.000000,2021-11-01 06:00:00,6.0,0,305,1635746400000000000,2.0,2.0,0.0
4,30211,McGregor,McPhillips,1727.524447,0,0.0,619.0,1.0,-16.000000,2021-11-01 06:00:00,6.0,0,305,1635746400000000000,4.0,2.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
472683,50260,Marion,Marion,880.704855,1,158.0,353.0,3.0,-110.666667,2021-12-01 00:00:00,0.0,2,335,1638316800000000000,4.0,4.0,0.0
472684,50261,Marion,Marion,950.386016,1,195.0,353.0,3.0,-103.666667,2021-12-01 00:00:00,0.0,2,335,1638316800000000000,4.0,4.0,0.0
472685,50295,Lagimodiere,Marion,1329.174369,0,0.0,353.0,1.0,-90.000000,2021-12-01 00:00:00,0.0,2,335,1638316800000000000,2.0,0.0,0.0
472686,50621,Marion,Marion,745.765722,1,195.0,353.0,3.0,-115.333333,2021-12-01 00:00:00,0.0,2,335,1638316800000000000,4.0,4.0,0.0


In [12]:
df.to_csv("data/evaluations/traffic_nov_1h_constr.csv")