In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr
from scipy.stats import norm
import statsmodels.formula.api as sm
import statsmodels.api as stats
from scipy.stats import binom_test
from scipy.stats import binom
from folium import plugins
from datetime import datetime, timedelta
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster
import geopandas as gpd

In [2]:
# %conda install seaborn -c conda-forge

In [3]:
df_Entirety = pd.read_csv('../data/headway_data_clean.csv')

In [4]:
# Ensure you're handing it floats
df_Entirety['LATITUDE'] = df_Entirety['LATITUDE'].astype(float)
df_Entirety['LONGITUDE'] = df_Entirety['LONGITUDE'].astype(float)


In [5]:
# Filter the DF for rows, then columns, then remove NaNs
heat_df = df_Entirety[df_Entirety['TRIP_EDGE']<2] # Reducing data size so it runs faster
heat_df = df_Entirety.loc[df_Entirety['OVERLOAD_ID']==0] # Reducing data size so it runs faster
# heat_df = heat_df[['LATITUDE', 'LONGITUDE']]

In [6]:
df_NoEvent_SpecRt_DateRange0 = heat_df.loc[heat_df['SCHEDULED_TIME'] > '2023-07-31']
df_NoEvent_SpecRt_DateRange0

Unnamed: 0,CALENDAR_ID,SERVICE_ABBR,ADHERENCE_ID,DATE,ROUTE_ABBR,BLOCK_ABBR,OPERATOR,TRIP_ID,OVERLOAD_ID,ROUTE_DIRECTION_NAME,...,ADJUSTED_LATE_COUNT,ADJUSTED_ONTIME_COUNT,STOP_CANCELLED,PREV_SCHED_STOP_CANCELLED,IS_RELIEF,BLOCK_STOP_ORDER,DWELL_IN_MINS,NextDay_Scheduled,NextDay_Actual_Arrival,NextDay_Actual_Departure
0,120230801,1,99457890,2023-08-01,22,2200,1040,345104,0,TO DOWNTOWN,...,0,1,0,0.0,0,2,6.500000,0,0,0
1,120230801,1,99457891,2023-08-01,22,2200,1040,345104,0,TO DOWNTOWN,...,0,1,0,0.0,0,9,0.000000,0,0,0
2,120230801,1,99457892,2023-08-01,22,2200,1040,345104,0,TO DOWNTOWN,...,0,1,0,0.0,0,19,0.000000,0,0,0
3,120230801,1,99457893,2023-08-01,22,2200,1040,345104,0,TO DOWNTOWN,...,0,1,0,,0,35,0.000000,0,0,0
4,120230801,1,99457894,2023-08-01,22,2200,1040,345105,0,FROM DOWNTOWN,...,0,1,0,0.0,0,36,12.866666,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
350323,120230930,2,100406610,2023-09-30,7,702,2312,353448,0,TO DOWNTOWN,...,1,0,0,0.0,0,539,0.000000,0,0,0
350324,120230930,2,100406611,2023-09-30,7,702,2312,353448,0,TO DOWNTOWN,...,1,0,0,,0,551,0.000000,0,0,0
350325,120230930,2,100406612,2023-09-30,7,702,2312,353449,0,FROM DOWNTOWN,...,0,1,0,0.0,0,552,0.000000,0,0,0
350326,120230930,2,100406613,2023-09-30,7,702,2312,353449,0,FROM DOWNTOWN,...,1,0,0,0.0,0,563,0.000000,0,0,0


In [7]:
df_NoEvent_SpecRt_DateRange1 = df_NoEvent_SpecRt_DateRange0.loc[df_NoEvent_SpecRt_DateRange0['SCHEDULED_TIME'] < '2023-08-08']
df_NoEvent_SpecRt_DateRange1

Unnamed: 0,CALENDAR_ID,SERVICE_ABBR,ADHERENCE_ID,DATE,ROUTE_ABBR,BLOCK_ABBR,OPERATOR,TRIP_ID,OVERLOAD_ID,ROUTE_DIRECTION_NAME,...,ADJUSTED_LATE_COUNT,ADJUSTED_ONTIME_COUNT,STOP_CANCELLED,PREV_SCHED_STOP_CANCELLED,IS_RELIEF,BLOCK_STOP_ORDER,DWELL_IN_MINS,NextDay_Scheduled,NextDay_Actual_Arrival,NextDay_Actual_Departure
0,120230801,1,99457890,2023-08-01,22,2200,1040,345104,0,TO DOWNTOWN,...,0,1,0,0.0,0,2,6.500000,0,0,0
1,120230801,1,99457891,2023-08-01,22,2200,1040,345104,0,TO DOWNTOWN,...,0,1,0,0.0,0,9,0.000000,0,0,0
2,120230801,1,99457892,2023-08-01,22,2200,1040,345104,0,TO DOWNTOWN,...,0,1,0,0.0,0,19,0.000000,0,0,0
3,120230801,1,99457893,2023-08-01,22,2200,1040,345104,0,TO DOWNTOWN,...,0,1,0,,0,35,0.000000,0,0,0
4,120230801,1,99457894,2023-08-01,22,2200,1040,345105,0,FROM DOWNTOWN,...,0,1,0,0.0,0,36,12.866666,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40159,120230807,1,99627649,2023-08-07,7,706,2779,353532,0,TO DOWNTOWN,...,1,0,0,0.0,0,59,0.000000,0,0,0
40160,120230807,1,99627650,2023-08-07,7,706,2779,353532,0,TO DOWNTOWN,...,1,0,0,,0,71,0.000000,0,0,0
40161,120230807,1,99627651,2023-08-07,7,706,2779,353533,0,FROM DOWNTOWN,...,1,0,0,0.0,0,72,0.000000,0,0,0
40162,120230807,1,99627652,2023-08-07,7,706,2779,353533,0,FROM DOWNTOWN,...,1,0,0,0.0,0,83,0.000000,0,0,0


In [8]:
df_NoEvent_SpecRt_DateTimeRange0 = df_NoEvent_SpecRt_DateRange1[df_NoEvent_SpecRt_DateRange1['SCHEDULED_TIME'] > '15:00:00']  
df_NoEvent_SpecRt_DateTimeRange0

Unnamed: 0,CALENDAR_ID,SERVICE_ABBR,ADHERENCE_ID,DATE,ROUTE_ABBR,BLOCK_ABBR,OPERATOR,TRIP_ID,OVERLOAD_ID,ROUTE_DIRECTION_NAME,...,ADJUSTED_LATE_COUNT,ADJUSTED_ONTIME_COUNT,STOP_CANCELLED,PREV_SCHED_STOP_CANCELLED,IS_RELIEF,BLOCK_STOP_ORDER,DWELL_IN_MINS,NextDay_Scheduled,NextDay_Actual_Arrival,NextDay_Actual_Departure
0,120230801,1,99457890,2023-08-01,22,2200,1040,345104,0,TO DOWNTOWN,...,0,1,0,0.0,0,2,6.500000,0,0,0
1,120230801,1,99457891,2023-08-01,22,2200,1040,345104,0,TO DOWNTOWN,...,0,1,0,0.0,0,9,0.000000,0,0,0
2,120230801,1,99457892,2023-08-01,22,2200,1040,345104,0,TO DOWNTOWN,...,0,1,0,0.0,0,19,0.000000,0,0,0
3,120230801,1,99457893,2023-08-01,22,2200,1040,345104,0,TO DOWNTOWN,...,0,1,0,,0,35,0.000000,0,0,0
4,120230801,1,99457894,2023-08-01,22,2200,1040,345105,0,FROM DOWNTOWN,...,0,1,0,0.0,0,36,12.866666,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40159,120230807,1,99627649,2023-08-07,7,706,2779,353532,0,TO DOWNTOWN,...,1,0,0,0.0,0,59,0.000000,0,0,0
40160,120230807,1,99627650,2023-08-07,7,706,2779,353532,0,TO DOWNTOWN,...,1,0,0,,0,71,0.000000,0,0,0
40161,120230807,1,99627651,2023-08-07,7,706,2779,353533,0,FROM DOWNTOWN,...,1,0,0,0.0,0,72,0.000000,0,0,0
40162,120230807,1,99627652,2023-08-07,7,706,2779,353533,0,FROM DOWNTOWN,...,1,0,0,0.0,0,83,0.000000,0,0,0


In [9]:
## ASK MICHAEL FOR HELP HERE:

df_NoEvent_SpecRt_DateTimeRange1 = df_NoEvent_SpecRt_DateTimeRange0[df_NoEvent_SpecRt_DateTimeRange0['SCHEDULED_TIME'] < '18:00:00'] 
df_NoEvent_SpecRt_DateTimeRange1

Unnamed: 0,CALENDAR_ID,SERVICE_ABBR,ADHERENCE_ID,DATE,ROUTE_ABBR,BLOCK_ABBR,OPERATOR,TRIP_ID,OVERLOAD_ID,ROUTE_DIRECTION_NAME,...,ADJUSTED_LATE_COUNT,ADJUSTED_ONTIME_COUNT,STOP_CANCELLED,PREV_SCHED_STOP_CANCELLED,IS_RELIEF,BLOCK_STOP_ORDER,DWELL_IN_MINS,NextDay_Scheduled,NextDay_Actual_Arrival,NextDay_Actual_Departure


In [10]:
df_NoEvent_SpecRt_DateTimeRange0_SpecDirection = df_NoEvent_SpecRt_DateTimeRange0[df_NoEvent_SpecRt_DateTimeRange0['ROUTE_DIRECTION_NAME'] == 'FROM DOWNTOWN']
df_NoEvent_SpecRt_DateTimeRange0_SpecDirection

Unnamed: 0,CALENDAR_ID,SERVICE_ABBR,ADHERENCE_ID,DATE,ROUTE_ABBR,BLOCK_ABBR,OPERATOR,TRIP_ID,OVERLOAD_ID,ROUTE_DIRECTION_NAME,...,ADJUSTED_LATE_COUNT,ADJUSTED_ONTIME_COUNT,STOP_CANCELLED,PREV_SCHED_STOP_CANCELLED,IS_RELIEF,BLOCK_STOP_ORDER,DWELL_IN_MINS,NextDay_Scheduled,NextDay_Actual_Arrival,NextDay_Actual_Departure
4,120230801,1,99457894,2023-08-01,22,2200,1040,345105,0,FROM DOWNTOWN,...,0,1,0,0.0,0,36,12.866666,0,0,0
5,120230801,1,99457895,2023-08-01,22,2200,1040,345105,0,FROM DOWNTOWN,...,0,1,0,0.0,0,51,0.000000,0,0,0
6,120230801,1,99457896,2023-08-01,22,2200,1040,345105,0,FROM DOWNTOWN,...,0,1,0,,0,62,0.000000,0,0,0
11,120230801,1,99457901,2023-08-01,22,2200,1040,345107,0,FROM DOWNTOWN,...,0,1,0,0.0,0,97,10.800000,0,0,0
12,120230801,1,99457902,2023-08-01,22,2200,1040,345107,0,FROM DOWNTOWN,...,0,0,0,0.0,0,112,0.000000,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40156,120230807,1,99627646,2023-08-07,7,706,2779,353531,0,FROM DOWNTOWN,...,1,0,0,0.0,0,35,2.750000,0,0,0
40157,120230807,1,99627647,2023-08-07,7,706,2779,353531,0,FROM DOWNTOWN,...,1,0,0,,0,49,0.000000,0,0,0
40161,120230807,1,99627651,2023-08-07,7,706,2779,353533,0,FROM DOWNTOWN,...,1,0,0,0.0,0,72,0.000000,0,0,0
40162,120230807,1,99627652,2023-08-07,7,706,2779,353533,0,FROM DOWNTOWN,...,1,0,0,0.0,0,83,0.000000,0,0,0


In [11]:
df_NoEvent_SpecRt_DateTimeRange0_SpecDirection_EdgeSelect = df_NoEvent_SpecRt_DateTimeRange0_SpecDirection[df_NoEvent_SpecRt_DateTimeRange0_SpecDirection['TRIP_EDGE']<2]

In [12]:
 
heat_df = df_NoEvent_SpecRt_DateTimeRange0_SpecDirection[['LATITUDE', 'LONGITUDE']]

In [13]:
heat_df.head()

Unnamed: 0,LATITUDE,LONGITUDE
4,36.167091,-86.781923
5,36.18348,-86.81422
6,36.181248,-86.847705
11,36.167091,-86.781923
12,36.18348,-86.81422


**TASK: Value count of time_point_abbrev & route_number**

In [14]:
# Create weight column, using date
heat_df['Weight'] = df_Entirety['HDWY_DEV'].astype(float) 
# heat_df['Weight'] = heat_df['Weight']
heat_df = heat_df.dropna(axis=0, subset=['LATITUDE','LONGITUDE','Weight'])
heat_df['Weight'] = ((heat_df['Weight'])) / (heat_df['Weight'].min() - heat_df['Weight'].max()) # focus on BUNCHING


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  heat_df['Weight'] = df_Entirety['HDWY_DEV'].astype(float)


In [15]:
heat_df.head()

Unnamed: 0,LATITUDE,LONGITUDE,Weight
11,36.167091,-86.781923,0.007976
12,36.18348,-86.81422,0.014295
18,36.167091,-86.781923,-0.000859
19,36.18348,-86.81422,0.001411
25,36.167091,-86.781923,0.000614


In [16]:
# heat_df['Weight'] = np.random.random(size=len(heat_df))

In [17]:
heat_map_pts = heat_df.values 
heat_map_pts

array([[ 3.61670905e+01, -8.67819226e+01,  7.97595067e-03],
       [ 3.61834800e+01, -8.68142200e+01,  1.42953580e-02],
       [ 3.61670905e+01, -8.67819226e+01, -8.58947175e-04],
       ...,
       [ 3.61388810e+01, -8.68006220e+01,  5.09233720e-02],
       [ 3.61670905e+01, -8.67819226e+01, -3.49101160e-02],
       [ 3.61388810e+01, -8.68006220e+01, -3.38671084e-02]])

In [18]:

# List comprehension to make out list of lists
# heat_data = [[[row['LATITUDE'],row['LONGITUDE']] for index, row in heat_df[heat_df['Weight'] == i].iterrows()] for i in range(len(df_Entirety))]

In [19]:
# heat_data[1000]

In [20]:
# len(heat_data)

In [21]:
# time_ = 0
# N = len(df_Entirety)
#itensify_factor = 30
# for time_entry in df_Entirety:
#    time_ = time_+1
#    for row in time_entry:
#        weight = min(np.random.uniform()*(time_/(N))*itensify_factor, 1)
#        row.append(weight)

In [22]:
# time_index = [
#    (datetime.now() + k * timedelta(1)).strftime("%Y-%m-%d") for k in range(len(df_Entirety))
# ]


In [23]:
Headway_Dev_Heat_Map_Overall = folium.Map([36.174465, -86.767960], tiles='OpenStreetMap', zoom_start=11, control_scale=True) 
# Heat_Map_Over_Time
# Heat_Map_Over_Time

In [24]:
# Plot it on the map
hm = plugins.HeatMap(heat_map_pts)
hm.add_to(Headway_Dev_Heat_Map_Overall)
# Display the map
Headway_Dev_Heat_Map_Overall