<h3>Next Steps</h3>
<ul>
    <li>figure out why num_ebikes_available does not always match vehicle_type_id = 2 (sometimes greater, sometimes less)
        <li>why are there 1664 stations (should be less)
            <li>what do is_renting and is_returning mean?
</ul>

<h3>Documentation</h3>
Live Station Data from Divvy Site
https://gbfs.divvybikes.com/gbfs/2.3/gbfs.json

### methods
I'm removing public racks and only looking at classic bikes

### Findings
837 have "public rack" in name, 828 don't

# import libraries

In [1]:
import pandas as pd
import requests
import datetime
import pytz

# review vehicle types

In [2]:
url = f"https://gbfs.lyft.com/gbfs/2.3/chi/en/vehicle_types.json?"

#run the query
response = requests.get(url)
data = response.json()
response

<Response [200]>

In [3]:
#navigate through dictionaries to find station data
#data.keys()
data['data'].keys()

dict_keys(['vehicle_types'])

In [4]:
central_tz = pytz.timezone('America/Chicago')
status_timestamp = datetime.datetime.utcfromtimestamp(data['last_updated']).replace(tzinfo=pytz.utc)
status_timestamp = status_timestamp.astimezone(central_tz).strftime('_%Y_%m_%d_%I%M%p')
status_timestamp

'_2023_11_16_0926AM'

In [5]:
#look at all data
data['data']['vehicle_types']

[{'vehicle_type_id': '1',
  'form_factor': 'bicycle',
  'propulsion_type': 'human'},
 {'max_range_meters': 54717.56,
  'vehicle_type_id': '2',
  'form_factor': 'bicycle',
  'propulsion_type': 'electric_assist'},
 {'max_range_meters': 70810.95999999999,
  'vehicle_type_id': '3',
  'form_factor': 'scooter',
  'propulsion_type': 'electric'}]

# import station info

In [6]:
url = f"https://gbfs.lyft.com/gbfs/2.3/chi/en/station_information.json?"

#request data
response = requests.get(url)
data = response.json()
response

<Response [200]>

In [7]:
#navigate through dictionaries to find station data
#data.keys()
data['data'].keys()

dict_keys(['stations'])

In [8]:
#look at the first station
data['data']['stations'][0]

{'lon': -87.617854,
 'name': 'McClurg Ct & Erie St',
 'station_id': 'a3a614ad-a135-11e9-9cda-0a87ae2ba916',
 'lat': 41.894503,
 'rental_uris': {'android': 'https://chi.lft.to/lastmile_qr_scan',
  'ios': 'https://chi.lft.to/lastmile_qr_scan'},
 'short_name': 'KA1503000041',
 'capacity': 31}

In [9]:
df_station_info = pd.DataFrame(data['data']['stations'], columns=['station_id','name','lat','lon','capacity','short_name'])
df_station_info.head()

Unnamed: 0,station_id,name,lat,lon,capacity,short_name
0,a3a614ad-a135-11e9-9cda-0a87ae2ba916,McClurg Ct & Erie St,41.894503,-87.617854,31,KA1503000041
1,a3ab9119-a135-11e9-9cda-0a87ae2ba916,Ravenswood Ave & Lawrence Ave,41.968445,-87.674226,47,TA1309000066
2,a3b222fc-a135-11e9-9cda-0a87ae2ba916,Racine Ave & Garfield Blvd,41.794228,-87.655073,11,559
3,a3a79adb-a135-11e9-9cda-0a87ae2ba916,Green St & Madison St,41.881859,-87.649264,27,TA1307000120
4,a3a547b8-a135-11e9-9cda-0a87ae2ba916,63rd St Beach,41.780911,-87.576324,15,15491


# Import station status

In [10]:
# select = "date_extract_y(date) as Year,date_extract_m(date) as Month,count(distinct date) as nDays,sum(rides) as nRides"
# where = "year between 2019 and 2022"
# group_by = "Year, Month"
# limit = 9999

# url = f"https://data.cityofchicago.org/resource/5neh-572f.json?$SELECT={select}&$WHERE={where}&$GROUP={group_by}&$LIMIT={limit}"
url = f"https://gbfs.lyft.com/gbfs/2.3/chi/en/station_status.json?"
url

'https://gbfs.lyft.com/gbfs/2.3/chi/en/station_status.json?'

In [11]:
#run the query
response = requests.get(url)
data = response.json()
response

<Response [200]>

In [12]:
#navigate through dictionaries to find station data
# data.keys()
data['data'].keys()

dict_keys(['stations'])

In [13]:
#look at the first station
data_station_status = data['data']['stations']
data_station_status[0]

{'num_bikes_available': 5,
 'num_scooters_unavailable': 0,
 'vehicle_types_available': [{'vehicle_type_id': '1', 'count': 4},
  {'vehicle_type_id': '2', 'count': 1}],
 'station_id': 'a3a547b8-a135-11e9-9cda-0a87ae2ba916',
 'is_renting': 1,
 'num_bikes_disabled': 0,
 'num_ebikes_available': 1,
 'is_returning': 1,
 'num_docks_disabled': 0,
 'vehicle_docks_available': [{'count': 10, 'vehicle_type_ids': ['1', '2']}],
 'num_scooters_available': 0,
 'num_docks_available': 10,
 'is_installed': 1,
 'last_reported': 1700148230}

In [14]:
#read into dataframe and rename columns
df_station_status= pd.DataFrame(data_station_status, columns=
        ['station_id','num_bikes_available','vehicle_types_available','num_bikes_disabled','num_docks_available','num_ebikes_available'])
# df_stations.rename(columns={
#     'num_bikes_available':'n_good_bikes',
#     'num_bikes_disabled':'n_broken_bikes',
#     'num_docks_available':'n_docks_open'
# }, inplace=True)

In [15]:
#add calculated columns
df_station_status['n_classic'] = df_station_status['vehicle_types_available'].apply(
    lambda x: next((item['count'] for item in x if item['vehicle_type_id'] == '1'), 0))
df_station_status['n_electric'] = df_station_status['vehicle_types_available'].apply(
    lambda x: next((item['count'] for item in x if item['vehicle_type_id'] == '2'), 0))
# df_stations['nScooters'] = df_stations['vehicle_types_available'].apply(
#     lambda x: next((item['count'] for item in x if item['vehicle_type_id'] == '3'), 0))
df_station_status['n_all_bikes']=df_station_status['num_bikes_available']+df_station_status['num_bikes_disabled']
df_station_status.drop('vehicle_types_available',axis=1, inplace=True)
df_station_status['is_no_classic']=(df_station_status['n_classic']==0)
df_station_status['is_no_docks']=(df_station_status['num_docks_available']==0)
df_station_status['is_problem_station']= df_station_status['is_no_classic'] | df_station_status['is_no_docks']
df_station_status['station_id_len']=df_station_status['station_id'].str.len()

df_station_status.head()

Unnamed: 0,station_id,num_bikes_available,num_bikes_disabled,num_docks_available,num_ebikes_available,n_classic,n_electric,n_all_bikes,is_no_classic,is_no_docks,is_problem_station,station_id_len
0,a3a547b8-a135-11e9-9cda-0a87ae2ba916,5,0,10,1,4,1,5,False,False,False,36
1,a3af9a83-a135-11e9-9cda-0a87ae2ba916,13,0,2,0,13,0,13,False,False,False,36
2,a3ac30c5-a135-11e9-9cda-0a87ae2ba916,6,1,8,0,6,0,7,False,False,False,36
3,a3addda0-a135-11e9-9cda-0a87ae2ba916,11,0,0,8,3,8,11,False,True,True,36
4,a3a3ace6-a135-11e9-9cda-0a87ae2ba916,1,0,13,0,1,0,1,False,False,False,36


# Merge Station Info

In [16]:
df_stations = pd.merge(df_station_status, df_station_info, on='station_id')
df_stations.head()

Unnamed: 0,station_id,num_bikes_available,num_bikes_disabled,num_docks_available,num_ebikes_available,n_classic,n_electric,n_all_bikes,is_no_classic,is_no_docks,is_problem_station,station_id_len,name,lat,lon,capacity,short_name
0,a3a547b8-a135-11e9-9cda-0a87ae2ba916,5,0,10,1,4,1,5,False,False,False,36,63rd St Beach,41.780911,-87.576324,15,15491
1,a3af9a83-a135-11e9-9cda-0a87ae2ba916,13,0,2,0,13,0,13,False,False,False,36,Drake Ave & Montrose Ave,41.961154,-87.716569,15,KA1504000097
2,a3ac30c5-a135-11e9-9cda-0a87ae2ba916,6,1,8,0,6,0,7,False,False,False,36,Cottage Grove Ave & 51st St,41.803038,-87.606615,15,TA1309000067
3,a3addda0-a135-11e9-9cda-0a87ae2ba916,11,0,0,8,3,8,11,False,True,True,36,Kedzie Ave & 24th St,41.848193,-87.705412,11,KA1504000086
4,a3a3ace6-a135-11e9-9cda-0a87ae2ba916,1,0,13,0,1,0,1,False,False,False,36,Racine Ave & 18th St,41.858166,-87.656495,15,13164


In [17]:
#identify and remove public racks
df_stations['is_public_rack']=df_stations['name'].str[:11]=='Public Rack'
df_stations_divvy = df_stations[df_stations['is_public_rack']==False]
df_stations_divvy

Unnamed: 0,station_id,num_bikes_available,num_bikes_disabled,num_docks_available,num_ebikes_available,n_classic,n_electric,n_all_bikes,is_no_classic,is_no_docks,is_problem_station,station_id_len,name,lat,lon,capacity,short_name,is_public_rack
0,a3a547b8-a135-11e9-9cda-0a87ae2ba916,5,0,10,1,4,1,5,False,False,False,36,63rd St Beach,41.780911,-87.576324,15,15491,False
1,a3af9a83-a135-11e9-9cda-0a87ae2ba916,13,0,2,0,13,0,13,False,False,False,36,Drake Ave & Montrose Ave,41.961154,-87.716569,15,KA1504000097,False
2,a3ac30c5-a135-11e9-9cda-0a87ae2ba916,6,1,8,0,6,0,7,False,False,False,36,Cottage Grove Ave & 51st St,41.803038,-87.606615,15,TA1309000067,False
3,a3addda0-a135-11e9-9cda-0a87ae2ba916,11,0,0,8,3,8,11,False,True,True,36,Kedzie Ave & 24th St,41.848193,-87.705412,11,KA1504000086,False
4,a3a3ace6-a135-11e9-9cda-0a87ae2ba916,1,0,13,0,1,0,1,False,False,False,36,Racine Ave & 18th St,41.858166,-87.656495,15,13164,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1643,1575949498093662914,1,1,7,1,0,0,2,True,False,True,19,Lavergne Ave & Division St,41.902513,-87.750869,9,,False
1644,1594046418168009436,6,1,2,6,0,0,7,True,False,True,19,Elston Ave & Henderson St,41.941499,-87.701051,9,,False
1650,1594046418168009438,1,0,7,1,0,0,1,True,False,True,19,Albany Ave & Belmont Ave,41.939638,-87.705263,9,,False
1652,1594046456822715150,5,1,3,5,0,0,6,True,False,True,19,Francisco Ave & Hollywood Ave,41.984746,-87.701593,9,,False


In [18]:
#clean name; this is moot since I've already stripped out the Public Rack stations
# df_stations['name_cleaned'] = df_stations['name'].str.replace('^Public Rack - ', '', regex=True)
# df_stations.drop(columns=['name'],inplace=True)

# Export for analysis

In [19]:
#create a filename using the timestamp as a suffix
filename = f"../results/station_status{status_timestamp}.csv"
filename

'../results/station_status_2023_11_16_0926AM.csv'

In [25]:
df_stations_divvy.to_csv(filename, index=False)

# Summarize Data

In [21]:
#total # of bikes
# df_stations_divvy['n_all_bikes'].sum()

In [22]:
#calculate percent broken bikes
# 100*df_stations_divvy['n_broken_bikes'].sum()/df_stations['n_all_bikes'].sum()

# Quality Check

In [23]:
#nElectric doesn't always match num_ebikes_available, can be less than or greater
#df_stations[df_stations['nElectric']!=df_stations['num_ebikes_available']]

In [24]:
#confirm totals
# df_stations['nBikes_check']= df_stations['nClassic'] + df_stations['nElectric']
# df_stations[df_stations['nBikes']!=df_stations['nBikes_check']]