# Read Data

In [5]:
import pandas as pd

In [6]:
df = pd.read_csv("../data/202401-divvy-tripdata.csv")

# Preliminary Review of Data Architecture

In [7]:
df.head()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual
0,C1D650626C8C899A,electric_bike,2024-01-12 15:30:27,2024-01-12 15:37:59,Wells St & Elm St,KA1504000135,Kingsbury St & Kinzie St,KA1503000043,41.903267,-87.634737,41.889177,-87.638506,member
1,EECD38BDB25BFCB0,electric_bike,2024-01-08 15:45:46,2024-01-08 15:52:59,Wells St & Elm St,KA1504000135,Kingsbury St & Kinzie St,KA1503000043,41.902937,-87.63444,41.889177,-87.638506,member
2,F4A9CE78061F17F7,electric_bike,2024-01-27 12:27:19,2024-01-27 12:35:19,Wells St & Elm St,KA1504000135,Kingsbury St & Kinzie St,KA1503000043,41.902951,-87.63447,41.889177,-87.638506,member
3,0A0D9E15EE50B171,classic_bike,2024-01-29 16:26:17,2024-01-29 16:56:06,Wells St & Randolph St,TA1305000030,Larrabee St & Webster Ave,13193,41.884295,-87.633963,41.921822,-87.64414,member
4,33FFC9805E3EFF9A,classic_bike,2024-01-31 05:43:23,2024-01-31 06:09:35,Lincoln Ave & Waveland Ave,13253,Kingsbury St & Kinzie St,KA1503000043,41.948797,-87.675278,41.889177,-87.638506,member


In [8]:
df.dtypes

ride_id                object
rideable_type          object
started_at             object
ended_at               object
start_station_name     object
start_station_id       object
end_station_name       object
end_station_id         object
start_lat             float64
start_lng             float64
end_lat               float64
end_lng               float64
member_casual          object
dtype: object

In [9]:
# verify end date
df['ended_at'].max()

'2024-02-02 00:01:21'

In [10]:
# convert times to datetime
df['started_at'] = pd.to_datetime(df['started_at'])
df['ended_at'] = pd.to_datetime(df['ended_at'])

In [11]:
#filter for just january, to confirm whether data is filtered by start time alone (i.e. trip started in January)
df_january = df[(df['started_at'].dt.month == 1) & (df['started_at'].dt.year == 2024)]
len(df_january)

144873

In [12]:
# check for nulls/ unreturned bikes
df['ended_at'].isnull().sum()

0

In [13]:
# parse date and time
df['start_date'] = df['started_at'].dt.strftime('%Y-%m-%d')
df['start_time'] = df['started_at'].dt.strftime('%H:%M')

In [14]:
df.head()

Unnamed: 0,ride_id,rideable_type,started_at,ended_at,start_station_name,start_station_id,end_station_name,end_station_id,start_lat,start_lng,end_lat,end_lng,member_casual,start_date,start_time
0,C1D650626C8C899A,electric_bike,2024-01-12 15:30:27,2024-01-12 15:37:59,Wells St & Elm St,KA1504000135,Kingsbury St & Kinzie St,KA1503000043,41.903267,-87.634737,41.889177,-87.638506,member,2024-01-12,15:30
1,EECD38BDB25BFCB0,electric_bike,2024-01-08 15:45:46,2024-01-08 15:52:59,Wells St & Elm St,KA1504000135,Kingsbury St & Kinzie St,KA1503000043,41.902937,-87.63444,41.889177,-87.638506,member,2024-01-08,15:45
2,F4A9CE78061F17F7,electric_bike,2024-01-27 12:27:19,2024-01-27 12:35:19,Wells St & Elm St,KA1504000135,Kingsbury St & Kinzie St,KA1503000043,41.902951,-87.63447,41.889177,-87.638506,member,2024-01-27,12:27
3,0A0D9E15EE50B171,classic_bike,2024-01-29 16:26:17,2024-01-29 16:56:06,Wells St & Randolph St,TA1305000030,Larrabee St & Webster Ave,13193,41.884295,-87.633963,41.921822,-87.64414,member,2024-01-29,16:26
4,33FFC9805E3EFF9A,classic_bike,2024-01-31 05:43:23,2024-01-31 06:09:35,Lincoln Ave & Waveland Ave,13253,Kingsbury St & Kinzie St,KA1503000043,41.948797,-87.675278,41.889177,-87.638506,member,2024-01-31,05:43


# Look for Daily Patterns

In [15]:
df_rides = df.groupby('start_date').size().reset_index(name='rides')

In [16]:
df_rides

Unnamed: 0,start_date,rides
0,2024-01-01,3658
1,2024-01-02,6533
2,2024-01-03,7468
3,2024-01-04,8120
4,2024-01-05,7383
5,2024-01-06,3542
6,2024-01-07,4440
7,2024-01-08,8052
8,2024-01-09,3261
9,2024-01-10,7870


# Review Weather

In [17]:
df_weather=pd.read_csv("../data/ncei_weather_jan_2024.csv")

In [18]:
df_weather.head()

Unnamed: 0,STATION,NAME,DATE,AWND,PGTM,PRCP,SNOW,SNWD,TAVG,TMAX,...,WDF5,WSF2,WSF5,WT01,WT02,WT04,WT05,WT06,WT08,WT09
0,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2024-01-01,10.07,,0.0,0.0,0.0,31,32,...,240,18.1,25.1,,,,,,,
1,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2024-01-02,12.08,,0.0,0.0,0.0,30,39,...,210,21.9,30.0,1.0,,,,,1.0,
2,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2024-01-03,7.16,,0.0,0.0,0.0,33,36,...,350,14.1,18.1,1.0,,,,1.0,,
3,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2024-01-04,4.92,,0.0,0.0,0.0,32,37,...,350,16.1,19.9,,,,,,,
4,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2024-01-05,6.71,,0.01,0.1,0.0,31,37,...,120,13.0,17.0,1.0,,,,,,


In [21]:
df_weather.sum()

STATION    USW00094846USW00094846USW00094846USW00094846US...
NAME       CHICAGO OHARE INTERNATIONAL AIRPORT, IL USCHIC...
DATE       2024-01-012024-01-022024-01-032024-01-042024-0...
AWND                                                  307.38
PGTM                                                   304.0
PRCP                                                     3.5
SNOW                                                    16.1
SNWD                                                    63.1
TAVG                                                     812
TMAX                                                     952
TMIN                                                     676
WDF2                                                    6660
WDF5                                                    6510
WSF2                                                   586.0
WSF5                                                   826.0
WT01                                                    22.0
WT02                    

In [43]:
df_combined = pd.merge(df_rides,df_weather,left_on="start_date",right_on="DATE")

In [44]:
df_combined

Unnamed: 0,start_date,rides,STATION,NAME,DATE,AWND,PGTM,PRCP,SNOW,SNWD,...,WDF5,WSF2,WSF5,WT01,WT02,WT04,WT05,WT06,WT08,WT09
0,2024-01-01,3658,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2024-01-01,10.07,,0.0,0.0,0.0,...,240,18.1,25.1,,,,,,,
1,2024-01-02,6533,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2024-01-02,12.08,,0.0,0.0,0.0,...,210,21.9,30.0,1.0,,,,,1.0,
2,2024-01-03,7468,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2024-01-03,7.16,,0.0,0.0,0.0,...,350,14.1,18.1,1.0,,,,1.0,,
3,2024-01-04,8120,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2024-01-04,4.92,,0.0,0.0,0.0,...,350,16.1,19.9,,,,,,,
4,2024-01-05,7383,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2024-01-05,6.71,,0.01,0.1,0.0,...,120,13.0,17.0,1.0,,,,,,
5,2024-01-06,3542,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2024-01-06,4.25,,0.21,2.6,2.0,...,110,10.1,14.1,1.0,,1.0,,,,
6,2024-01-07,4440,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2024-01-07,7.83,,0.02,0.8,3.1,...,280,14.1,19.0,1.0,,,,,1.0,
7,2024-01-08,8052,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2024-01-08,8.95,,0.08,0.4,1.2,...,100,18.1,23.9,1.0,,,,,,
8,2024-01-09,3261,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2024-01-09,13.2,,0.93,2.8,1.2,...,320,23.0,34.0,1.0,1.0,1.0,,,,
9,2024-01-10,7870,USW00094846,"CHICAGO OHARE INTERNATIONAL AIRPORT, IL US",2024-01-10,12.08,,0.0,0.1,2.0,...,310,23.0,33.1,1.0,,,,,,


In [45]:
df = df_combined[['start_date','SNOW','AWND','TAVG','rides']]

In [46]:
df.head()

Unnamed: 0,start_date,SNOW,AWND,TAVG,rides
0,2024-01-01,0.0,10.07,31,3658
1,2024-01-02,0.0,12.08,30,6533
2,2024-01-03,0.0,7.16,33,7468
3,2024-01-04,0.0,4.92,32,8120
4,2024-01-05,0.1,6.71,31,7383


In [47]:
df=df.rename(columns={'SNOW':'snowfall','AWND':'avg_wind_speed','TAVG':'avg_temp'})

In [48]:
# per https://www.chicagotribune.com/weather/ct-wea-asktom-0120-20180119-column.html
# Wind Chill = T - (V x 0.7)
df['wind_chill_simple']=df['avg_temp']-df['avg_wind_speed']*0.7
df['wind_chill']=35.74 + 0.6215*df['avg_temp']-35.75*(df['avg_wind_speed']**0.16)+.4275*df['avg_temp']*(df['avg_wind_speed']**0.16)

In [49]:
df

Unnamed: 0,start_date,snowfall,avg_wind_speed,avg_temp,rides,wind_chill_simple,wind_chill
0,2024-01-01,0.0,10.07,31,3658,23.951,22.451404
1,2024-01-02,0.0,12.08,30,6533,21.544,20.23112
2,2024-01-03,0.0,7.16,33,7468,27.988,26.594802
3,2024-01-04,0.0,4.92,32,8120,28.556,27.149524
4,2024-01-05,0.1,6.71,31,7383,26.303,24.498772
5,2024-01-06,2.6,4.25,33,3542,30.025,28.969168
6,2024-01-07,0.8,7.83,32,4440,26.519,24.951612
7,2024-01-08,0.4,8.95,33,8052,26.735,25.516914
8,2024-01-09,2.8,13.2,35,3261,25.76,26.080608
9,2024-01-10,0.1,12.08,33,7870,24.544,24.0063


In [29]:
df.to_csv("../results/streetsblog-divvy-winter.csv", index=False)