<h1>Combine Time Periods into One Dataset</h1>
11/17/23<br>
This notebook appends snapshots of Divvy station status with similar file names (e.g. station_status_2023_11_16*.csv) and appends them into one csv<br>

### next steps
in api-request.py, should rename timestamp to time_reported and remove that code here

# Define Time Period to Combine

In [30]:
#update this to combine different files
filename_import = "station_status_2023_11_17*.csv"
filename_export = "station-status-hourly-2023-11-17.csv"

# import libraries

In [37]:
import pandas as pd
import requests
from datetime import datetime
from pathlib import Path

# import station info

In [38]:
url = f"https://gbfs.lyft.com/gbfs/2.3/chi/en/station_information.json?"

#request data
response = requests.get(url)
data = response.json()
response

<Response [200]>

In [39]:
df_station_info = pd.DataFrame(data['data']['stations'], columns=['station_id','name','lat','lon','capacity'])
df_station_info.head()

Unnamed: 0,station_id,name,lat,lon,capacity
0,a3a82c6c-a135-11e9-9cda-0a87ae2ba916,Southport Ave & Waveland Ave,41.94815,-87.66394,23
1,a3b48c07-a135-11e9-9cda-0a87ae2ba916,Leavitt St & Chicago Ave,41.895501,-87.682017,19
2,a3a48d5c-a135-11e9-9cda-0a87ae2ba916,Elston Ave & Cortland St,41.916433,-87.666746,23
3,a3ac7e9a-a135-11e9-9cda-0a87ae2ba916,Racine Ave & 35th St,41.830689,-87.656211,15
4,a3a62aad-a135-11e9-9cda-0a87ae2ba916,Loomis St & Jackson Blvd,41.877945,-87.662007,19


# import all data files matching specified pattern

In [56]:
df_station_status = pd.DataFrame()

In [57]:
for file_path in Path('../data').glob(filename_import):
    #read next file
    df = pd.read_csv(file_path)
    
    #timestamp for data retrieval. This should later be moved to the api-request.py script called in GitHub actions
    time_retrieved_str = file_path.name[15:32]

    date_object = datetime.strptime(time_retrieved_str, '%Y_%m_%d_%I%M%p')
    time_retrieved = date_object.strftime('%m/%d/%Y %I:%M %p')
    
    df["time_retrieved"]= time_retrieved
    df.rename(columns={'timestamp': 'time_reported'}, inplace=True)
    
    #append this file
    df_station_status = pd.concat([df_station_status,df], ignore_index=True)
df_station_status

Unnamed: 0,num_bikes_disabled,num_scooters_available,num_docks_available,num_docks_disabled,is_installed,is_renting,num_bikes_available,num_scooters_unavailable,num_ebikes_available,station_id,is_returning,time_reported,n_classic,n_electric,n_scooters,time_retrieved
0,1,0.0,6,0,1,1,4,0.0,0,a3b2862e-a135-11e9-9cda-0a87ae2ba916,1,11/17/2023 01:03 AM,4,0,0,11/17/2023 01:05 AM
1,0,0.0,4,0,1,1,7,0.0,1,da9e6779-5056-4d45-bae3-9dc325ed2807,1,11/17/2023 01:03 AM,6,1,0,11/17/2023 01:05 AM
2,0,1.0,7,0,1,1,6,1.0,0,a3a3f0c6-a135-11e9-9cda-0a87ae2ba916,1,11/17/2023 01:03 AM,6,0,1,11/17/2023 01:05 AM
3,1,0.0,17,0,1,1,1,0.0,0,a3a795d7-a135-11e9-9cda-0a87ae2ba916,1,11/17/2023 01:03 AM,1,0,0,11/17/2023 01:05 AM
4,0,0.0,1,0,1,1,10,0.0,1,a3b2be3a-a135-11e9-9cda-0a87ae2ba916,1,11/17/2023 01:03 AM,9,1,0,11/17/2023 01:05 AM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16655,0,,1,0,1,1,0,,0,1715823822654071770,1,01/22/2023 09:45 PM,0,0,0,11/17/2023 12:09 AM
16656,0,0.0,7,0,1,1,2,0.0,2,1563698701206292480,1,01/22/2023 09:44 PM,0,0,0,11/17/2023 12:09 AM
16657,0,,2,0,1,1,0,,0,1674190591734328324,1,01/22/2023 09:46 PM,0,0,0,11/17/2023 12:09 AM
16658,0,,4,0,1,1,0,,0,1806749735787774444,1,05/04/2023 12:50 PM,0,0,0,11/17/2023 12:09 AM


# Prepare Data for Analysis

### calculate key metrics

In [58]:
#add calculated columns
df_station_status['is_no_classic']=(df_station_status['n_classic']==0)
df_station_status['is_no_docks']=(df_station_status['num_docks_available']==0)
df_station_status['is_problem_station']= df_station_status['is_no_classic'] | df_station_status['is_no_docks']

df_station_status.head()

Unnamed: 0,num_bikes_disabled,num_scooters_available,num_docks_available,num_docks_disabled,is_installed,is_renting,num_bikes_available,num_scooters_unavailable,num_ebikes_available,station_id,is_returning,time_reported,n_classic,n_electric,n_scooters,time_retrieved,is_no_classic,is_no_docks,is_problem_station
0,1,0.0,6,0,1,1,4,0.0,0,a3b2862e-a135-11e9-9cda-0a87ae2ba916,1,11/17/2023 01:03 AM,4,0,0,11/17/2023 01:05 AM,False,False,False
1,0,0.0,4,0,1,1,7,0.0,1,da9e6779-5056-4d45-bae3-9dc325ed2807,1,11/17/2023 01:03 AM,6,1,0,11/17/2023 01:05 AM,False,False,False
2,0,1.0,7,0,1,1,6,1.0,0,a3a3f0c6-a135-11e9-9cda-0a87ae2ba916,1,11/17/2023 01:03 AM,6,0,1,11/17/2023 01:05 AM,False,False,False
3,1,0.0,17,0,1,1,1,0.0,0,a3a795d7-a135-11e9-9cda-0a87ae2ba916,1,11/17/2023 01:03 AM,1,0,0,11/17/2023 01:05 AM,False,False,False
4,0,0.0,1,0,1,1,10,0.0,1,a3b2be3a-a135-11e9-9cda-0a87ae2ba916,1,11/17/2023 01:03 AM,9,1,0,11/17/2023 01:05 AM,False,False,False


# Merge Station Info

In [59]:
df_stations = pd.merge(df_station_status, df_station_info, on='station_id')
df_stations.head()

Unnamed: 0,num_bikes_disabled,num_scooters_available,num_docks_available,num_docks_disabled,is_installed,is_renting,num_bikes_available,num_scooters_unavailable,num_ebikes_available,station_id,...,n_electric,n_scooters,time_retrieved,is_no_classic,is_no_docks,is_problem_station,name,lat,lon,capacity
0,1,0.0,6,0,1,1,4,0.0,0,a3b2862e-a135-11e9-9cda-0a87ae2ba916,...,0,0,11/17/2023 01:05 AM,False,False,False,Bennett Ave & 79th St,41.751785,-87.578496,11
1,1,0.0,6,0,1,1,4,0.0,0,a3b2862e-a135-11e9-9cda-0a87ae2ba916,...,0,0,11/17/2023 02:07 AM,False,False,False,Bennett Ave & 79th St,41.751785,-87.578496,11
2,1,0.0,6,0,1,1,4,0.0,0,a3b2862e-a135-11e9-9cda-0a87ae2ba916,...,0,0,11/17/2023 03:06 AM,False,False,False,Bennett Ave & 79th St,41.751785,-87.578496,11
3,1,0.0,6,0,1,1,4,0.0,0,a3b2862e-a135-11e9-9cda-0a87ae2ba916,...,0,0,11/17/2023 04:06 AM,False,False,False,Bennett Ave & 79th St,41.751785,-87.578496,11
4,1,0.0,6,0,1,1,4,0.0,0,a3b2862e-a135-11e9-9cda-0a87ae2ba916,...,0,0,11/17/2023 05:05 AM,False,False,False,Bennett Ave & 79th St,41.751785,-87.578496,11


In [60]:
#identify and remove public racks
df_stations['is_public_rack']=df_stations['name'].str[:11]=='Public Rack'
df_stations_divvy = df_stations[df_stations['is_public_rack']==False]
df_stations_divvy

Unnamed: 0,num_bikes_disabled,num_scooters_available,num_docks_available,num_docks_disabled,is_installed,is_renting,num_bikes_available,num_scooters_unavailable,num_ebikes_available,station_id,...,n_scooters,time_retrieved,is_no_classic,is_no_docks,is_problem_station,name,lat,lon,capacity,is_public_rack
0,1,0.0,6,0,1,1,4,0.0,0,a3b2862e-a135-11e9-9cda-0a87ae2ba916,...,0,11/17/2023 01:05 AM,False,False,False,Bennett Ave & 79th St,41.751785,-87.578496,11,False
1,1,0.0,6,0,1,1,4,0.0,0,a3b2862e-a135-11e9-9cda-0a87ae2ba916,...,0,11/17/2023 02:07 AM,False,False,False,Bennett Ave & 79th St,41.751785,-87.578496,11,False
2,1,0.0,6,0,1,1,4,0.0,0,a3b2862e-a135-11e9-9cda-0a87ae2ba916,...,0,11/17/2023 03:06 AM,False,False,False,Bennett Ave & 79th St,41.751785,-87.578496,11,False
3,1,0.0,6,0,1,1,4,0.0,0,a3b2862e-a135-11e9-9cda-0a87ae2ba916,...,0,11/17/2023 04:06 AM,False,False,False,Bennett Ave & 79th St,41.751785,-87.578496,11,False
4,1,0.0,6,0,1,1,4,0.0,0,a3b2862e-a135-11e9-9cda-0a87ae2ba916,...,0,11/17/2023 05:05 AM,False,False,False,Bennett Ave & 79th St,41.751785,-87.578496,11,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16625,0,0.0,7,0,1,1,2,0.0,2,1563698701206292480,...,0,11/17/2023 06:11 AM,True,False,True,Lamon Ave & Belmont Ave,41.939011,-87.749283,9,False
16626,0,0.0,7,0,1,1,2,0.0,2,1563698701206292480,...,0,11/17/2023 07:09 AM,True,False,True,Lamon Ave & Belmont Ave,41.939011,-87.749283,9,False
16627,0,0.0,7,0,1,1,2,0.0,2,1563698701206292480,...,0,11/17/2023 08:06 AM,True,False,True,Lamon Ave & Belmont Ave,41.939011,-87.749283,9,False
16628,0,0.0,7,0,1,1,2,0.0,2,1563698701206292480,...,0,11/17/2023 09:06 AM,True,False,True,Lamon Ave & Belmont Ave,41.939011,-87.749283,9,False


# Export for analysis

In [61]:
df_stations_divvy.to_csv(f"../results/{filename_export}", index=False)