In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import copy
import math
import glob
import os
import seaborn as sns

In [2]:
path = r"/Users/Oh/Documents/CodeAcademyBerlin/bike_analysis/data/years_2010_to_2017"
all_files = glob.glob(os.path.join(path, "*.csv"))
all_files.sort() # Files are arranged already in correct alphabetical order in the folder, so we want to preserve the order.

df_from_each_file = (pd.read_csv(f) for f in all_files)
concatenated_df   = pd.concat(df_from_each_file, ignore_index=True)



<h3> Dataframe with daily values </h3>
First, create a dataframe for keeping track of daily values. <br>
Further below, will create a df for hourly values.

In [3]:
# Create a dataframe for the daily values

df_all_years = concatenated_df.copy(deep = True)        # Want to preserve the original "concatenated_df"

In [4]:
df_all_years.head()

Unnamed: 0,Duration,Start date,End date,Start station number,Start station,End station number,End station,Bike number,Member type
0,1012,2010-09-20 11:27:04,2010-09-20 11:43:56,31208,M St & New Jersey Ave SE,31108,4th & M St SW,W00742,Member
1,61,2010-09-20 11:41:22,2010-09-20 11:42:23,31209,1st & N St SE,31209,1st & N St SE,W00032,Member
2,2690,2010-09-20 12:05:37,2010-09-20 12:50:27,31600,5th & K St NW,31100,19th St & Pennsylvania Ave NW,W00993,Member
3,1406,2010-09-20 12:06:05,2010-09-20 12:29:32,31600,5th & K St NW,31602,Park Rd & Holmead Pl NW,W00344,Member
4,1413,2010-09-20 12:10:43,2010-09-20 12:34:17,31100,19th St & Pennsylvania Ave NW,31201,15th & P St NW,W00883,Member


In [5]:
df_all_years['Start date'] = pd.to_datetime(df_all_years['Start date'])

In [6]:
df_all_years['End date'] = pd.to_datetime(df_all_years['End date'])

In [7]:
# Remove the time. In this dataframe, we only want to keep track of daily values. 
df_all_years['Start date'] = df_all_years['Start date'].dt.date

In [8]:
df_all_years['Start date'] = pd.to_datetime(df_all_years['Start date'])

In [9]:
df_all_years.head()

Unnamed: 0,Duration,Start date,End date,Start station number,Start station,End station number,End station,Bike number,Member type
0,1012,2010-09-20,2010-09-20 11:43:56,31208,M St & New Jersey Ave SE,31108,4th & M St SW,W00742,Member
1,61,2010-09-20,2010-09-20 11:42:23,31209,1st & N St SE,31209,1st & N St SE,W00032,Member
2,2690,2010-09-20,2010-09-20 12:50:27,31600,5th & K St NW,31100,19th St & Pennsylvania Ave NW,W00993,Member
3,1406,2010-09-20,2010-09-20 12:29:32,31600,5th & K St NW,31602,Park Rd & Holmead Pl NW,W00344,Member
4,1413,2010-09-20,2010-09-20 12:34:17,31100,19th St & Pennsylvania Ave NW,31201,15th & P St NW,W00883,Member


In [10]:
df_all_years.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19117643 entries, 0 to 19117642
Data columns (total 9 columns):
 #   Column                Dtype         
---  ------                -----         
 0   Duration              int64         
 1   Start date            datetime64[ns]
 2   End date              datetime64[ns]
 3   Start station number  int64         
 4   Start station         object        
 5   End station number    int64         
 6   End station           object        
 7   Bike number           object        
 8   Member type           object        
dtypes: datetime64[ns](2), int64(3), object(4)
memory usage: 1.3+ GB


Select rows where Member type == 'Member' (i.e. 'registered') <br>
Then do groupby + agg to find total number of registered users per day

In [11]:
df_agg_registered = df_all_years [df_all_years['Member type'] == 'Member'].groupby('Start date', as_index = False).count().rename(columns={'Member type': 'Registered'})

In [12]:
df_agg_registered.head()

Unnamed: 0,Start date,Duration,End date,Start station number,Start station,End station number,End station,Bike number,Registered
0,2010-09-20,178,178,178,178,178,178,178,178
1,2010-09-21,215,215,215,215,215,215,215,215
2,2010-09-22,260,260,260,260,260,260,260,260
3,2010-09-23,249,249,249,249,249,249,249,249
4,2010-09-24,206,206,206,206,206,206,206,206


In [13]:
df_agg_registered.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2656 entries, 0 to 2655
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   Start date            2656 non-null   datetime64[ns]
 1   Duration              2656 non-null   int64         
 2   End date              2656 non-null   int64         
 3   Start station number  2656 non-null   int64         
 4   Start station         2656 non-null   int64         
 5   End station number    2656 non-null   int64         
 6   End station           2656 non-null   int64         
 7   Bike number           2656 non-null   int64         
 8   Registered            2656 non-null   int64         
dtypes: datetime64[ns](1), int64(8)
memory usage: 186.9 KB


In [14]:
df_agg_registered = df_agg_registered[['Start date', 'Registered']]

In [15]:
# This contains number of registered users for each day.
df_agg_registered

Unnamed: 0,Start date,Registered
0,2010-09-20,178
1,2010-09-21,215
2,2010-09-22,260
3,2010-09-23,249
4,2010-09-24,206
...,...,...
2651,2017-12-27,2934
2652,2017-12-28,2618
2653,2017-12-29,2744
2654,2017-12-30,1633


Select rows where Member type == 'Casual' <br>
Then do groupby + agg to find total number of casual users per day

In [16]:
df_agg_casual = df_all_years [df_all_years['Member type'] == 'Casual'].groupby('Start date', as_index = False).count().rename(columns={'Member type': 'Casual'})

In [17]:
df_agg_casual = df_agg_casual [['Start date', 'Casual']]

In [18]:
df_agg_casual

Unnamed: 0,Start date,Casual
0,2010-09-20,34
1,2010-09-21,109
2,2010-09-22,117
3,2010-09-23,124
4,2010-09-24,156
...,...,...
2651,2017-12-27,387
2652,2017-12-28,248
2653,2017-12-29,344
2654,2017-12-30,243


In [19]:
df_all_agg_daily = pd.merge(df_agg_registered, df_agg_casual, how = 'outer', on = ['Start date'])

In [20]:
df_all_agg_daily = df_all_agg_daily.rename(columns = {'Start date': 'date', 
                                            'Registered': 'registered',
                                            'Casual': 'casual'})

In [21]:
df_all_agg_daily.head()

Unnamed: 0,date,registered,casual
0,2010-09-20,178,34
1,2010-09-21,215,109
2,2010-09-22,260,117
3,2010-09-23,249,124
4,2010-09-24,206,156


In [22]:
df_all_agg_daily.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2656 entries, 0 to 2655
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        2656 non-null   datetime64[ns]
 1   registered  2656 non-null   int64         
 2   casual      2656 non-null   int64         
dtypes: datetime64[ns](1), int64(2)
memory usage: 83.0 KB


In [23]:
df_all_agg_daily.isna().sum()

date          0
registered    0
casual        0
dtype: int64

In [24]:
df_all_agg_daily['year'] = df_all_agg_daily['date'].dt.year
df_all_agg_daily['month'] = df_all_agg_daily['date'].dt.month
df_all_agg_daily['day'] = df_all_agg_daily['date'].dt.day_name()

In [25]:
df_all_agg_daily['day'] = df_all_agg_daily['day'].apply(lambda x: x[0:3])

In [26]:
df_all_agg_daily.head()

Unnamed: 0,date,registered,casual,year,month,day
0,2010-09-20,178,34,2010,9,Mon
1,2010-09-21,215,109,2010,9,Tue
2,2010-09-22,260,117,2010,9,Wed
3,2010-09-23,249,124,2010,9,Thu
4,2010-09-24,206,156,2010,9,Fri


Great. So now we have a dataframe with the following columns: <br>
date, total registered users, total casual users, year, month, day

Now we want to create the same kind of data, but on an hourly basis.

In [27]:
df_all_years_hourly = concatenated_df.copy(deep = True)

In [28]:
df_all_years_hourly.head()

Unnamed: 0,Duration,Start date,End date,Start station number,Start station,End station number,End station,Bike number,Member type
0,1012,2010-09-20 11:27:04,2010-09-20 11:43:56,31208,M St & New Jersey Ave SE,31108,4th & M St SW,W00742,Member
1,61,2010-09-20 11:41:22,2010-09-20 11:42:23,31209,1st & N St SE,31209,1st & N St SE,W00032,Member
2,2690,2010-09-20 12:05:37,2010-09-20 12:50:27,31600,5th & K St NW,31100,19th St & Pennsylvania Ave NW,W00993,Member
3,1406,2010-09-20 12:06:05,2010-09-20 12:29:32,31600,5th & K St NW,31602,Park Rd & Holmead Pl NW,W00344,Member
4,1413,2010-09-20 12:10:43,2010-09-20 12:34:17,31100,19th St & Pennsylvania Ave NW,31201,15th & P St NW,W00883,Member


In [29]:
df_all_years_hourly['Start date'] = pd.to_datetime(df_all_years_hourly['Start date'])

In [30]:
df_all_years_hourly['End date'] = pd.to_datetime(df_all_years_hourly['End date'])

In [31]:
df_all_years_hourly['hour'] = df_all_years_hourly['Start date'].dt.hour

In [32]:
df_all_years_hourly.head(5)

Unnamed: 0,Duration,Start date,End date,Start station number,Start station,End station number,End station,Bike number,Member type,hour
0,1012,2010-09-20 11:27:04,2010-09-20 11:43:56,31208,M St & New Jersey Ave SE,31108,4th & M St SW,W00742,Member,11
1,61,2010-09-20 11:41:22,2010-09-20 11:42:23,31209,1st & N St SE,31209,1st & N St SE,W00032,Member,11
2,2690,2010-09-20 12:05:37,2010-09-20 12:50:27,31600,5th & K St NW,31100,19th St & Pennsylvania Ave NW,W00993,Member,12
3,1406,2010-09-20 12:06:05,2010-09-20 12:29:32,31600,5th & K St NW,31602,Park Rd & Holmead Pl NW,W00344,Member,12
4,1413,2010-09-20 12:10:43,2010-09-20 12:34:17,31100,19th St & Pennsylvania Ave NW,31201,15th & P St NW,W00883,Member,12


In [33]:
df_all_years_hourly['Start date'] = df_all_years_hourly['Start date'].dt.date

Select rows where Member type == 'Member' (i.e. 'registered') <br>
Then do groupby + agg to find total number of registered users per hour

In [34]:
df_agg_registered_hourly = df_all_years_hourly [df_all_years_hourly['Member type'] == 'Member'].groupby(['Start date', 'hour'], as_index = False).count().rename(columns={'Member type': 'Registered'})

In [35]:
df_agg_registered_hourly.head()

Unnamed: 0,Start date,hour,Duration,End date,Start station number,Start station,End station number,End station,Bike number,Registered
0,2010-09-20,11,2,2,2,2,2,2,2,2
1,2010-09-20,12,17,17,17,17,17,17,17,17
2,2010-09-20,13,11,11,11,11,11,11,11,11
3,2010-09-20,14,4,4,4,4,4,4,4,4
4,2010-09-20,15,10,10,10,10,10,10,10,10


In [36]:
df_agg_registered_hourly.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 63156 entries, 0 to 63155
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Start date            63156 non-null  object
 1   hour                  63156 non-null  int64 
 2   Duration              63156 non-null  int64 
 3   End date              63156 non-null  int64 
 4   Start station number  63156 non-null  int64 
 5   Start station         63156 non-null  int64 
 6   End station number    63156 non-null  int64 
 7   End station           63156 non-null  int64 
 8   Bike number           63156 non-null  int64 
 9   Registered            63156 non-null  int64 
dtypes: int64(9), object(1)
memory usage: 4.8+ MB


In [37]:
df_agg_registered_hourly = df_agg_registered_hourly[['Start date', 'hour', 'Registered']]

In [38]:
# This contains number of registered users for each day.
df_agg_registered_hourly

Unnamed: 0,Start date,hour,Registered
0,2010-09-20,11,2
1,2010-09-20,12,17
2,2010-09-20,13,11
3,2010-09-20,14,4
4,2010-09-20,15,10
...,...,...,...
63151,2017-12-31,19,55
63152,2017-12-31,20,30
63153,2017-12-31,21,44
63154,2017-12-31,22,21


Select rows where Member type == 'Casual'  <br>
Then do groupby + agg to find total number of casual users per hour

In [39]:
df_agg_casual_hourly = df_all_years_hourly [df_all_years_hourly['Member type'] == 'Casual'].groupby(['Start date', 'hour'], as_index = False).count().rename(columns={'Member type': 'Casual'})

In [40]:
df_agg_casual_hourly.head()

Unnamed: 0,Start date,hour,Duration,End date,Start station number,Start station,End station number,End station,Bike number,Casual
0,2010-09-20,14,2,2,2,2,2,2,2,2
1,2010-09-20,15,2,2,2,2,2,2,2,2
2,2010-09-20,16,5,5,5,5,5,5,5,5
3,2010-09-20,17,2,2,2,2,2,2,2,2
4,2010-09-20,18,3,3,3,3,3,3,3,3


In [41]:
df_agg_casual_hourly.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57905 entries, 0 to 57904
Data columns (total 10 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Start date            57905 non-null  object
 1   hour                  57905 non-null  int64 
 2   Duration              57905 non-null  int64 
 3   End date              57905 non-null  int64 
 4   Start station number  57905 non-null  int64 
 5   Start station         57905 non-null  int64 
 6   End station number    57905 non-null  int64 
 7   End station           57905 non-null  int64 
 8   Bike number           57905 non-null  int64 
 9   Casual                57905 non-null  int64 
dtypes: int64(9), object(1)
memory usage: 4.4+ MB


In [42]:
df_agg_casual_hourly = df_agg_casual_hourly[['Start date', 'hour', 'Casual']]

In [43]:
# This contains number of casual users for each day.
df_agg_casual_hourly

Unnamed: 0,Start date,hour,Casual
0,2010-09-20,14,2
1,2010-09-20,15,2
2,2010-09-20,16,5
3,2010-09-20,17,2
4,2010-09-20,18,3
...,...,...,...
57900,2017-12-31,17,8
57901,2017-12-31,19,4
57902,2017-12-31,21,2
57903,2017-12-31,22,4


In [44]:
df_all_agg_hourly = pd.merge(df_agg_registered_hourly, df_agg_casual_hourly, how = 'outer', on = ['Start date', 'hour'])

In [45]:
df_all_agg_hourly.head()
#df_all_agg_hourly[df_all_agg_hourly['Start date'] == '2011-01-01'].head(50)

Unnamed: 0,Start date,hour,Registered,Casual
0,2010-09-20,11,2.0,
1,2010-09-20,12,17.0,
2,2010-09-20,13,11.0,
3,2010-09-20,14,4.0,2.0
4,2010-09-20,15,10.0,2.0


In [46]:
df_all_agg_hourly.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 63240 entries, 0 to 63239
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Start date  63240 non-null  object 
 1   hour        63240 non-null  int64  
 2   Registered  63156 non-null  float64
 3   Casual      57905 non-null  float64
dtypes: float64(2), int64(1), object(1)
memory usage: 2.4+ MB


In [47]:
df_all_agg_hourly['Start date'] = pd.to_datetime(df_all_agg_hourly['Start date'])

In [48]:
df_all_agg_hourly[df_all_agg_hourly['Start date'].dt.year == 2011].head(10)

Unnamed: 0,Start date,hour,Registered,Casual
2298,2011-01-01,0,13.0,3.0
2299,2011-01-01,1,30.0,8.0
2300,2011-01-01,2,26.0,5.0
2301,2011-01-01,3,9.0,3.0
2302,2011-01-01,4,1.0,
2303,2011-01-01,5,1.0,
2304,2011-01-01,7,2.0,1.0
2305,2011-01-01,8,7.0,1.0
2306,2011-01-01,9,6.0,8.0
2307,2011-01-01,10,22.0,12.0


In [49]:
# But we know that NaN are all zeros. So:

df_all_agg_hourly['Registered'] = df_all_agg_hourly['Registered'].fillna(0)
df_all_agg_hourly['Casual'] = df_all_agg_hourly['Casual'].fillna(0)

In [50]:
df_all_agg_hourly[df_all_agg_hourly['Start date'].dt.year == 2011].head(10)

Unnamed: 0,Start date,hour,Registered,Casual
2298,2011-01-01,0,13.0,3.0
2299,2011-01-01,1,30.0,8.0
2300,2011-01-01,2,26.0,5.0
2301,2011-01-01,3,9.0,3.0
2302,2011-01-01,4,1.0,0.0
2303,2011-01-01,5,1.0,0.0
2304,2011-01-01,7,2.0,1.0
2305,2011-01-01,8,7.0,1.0
2306,2011-01-01,9,6.0,8.0
2307,2011-01-01,10,22.0,12.0


In [51]:
df_all_agg_hourly['year'] = df_all_agg_hourly['Start date'].dt.year
df_all_agg_hourly['month'] = df_all_agg_hourly['Start date'].dt.month
df_all_agg_hourly['day'] = df_all_agg_hourly['Start date'].dt.day_name()
df_all_agg_hourly['day'] = df_all_agg_hourly['day'].apply(lambda x: x[0:3])

In [52]:
df_all_agg_hourly.head()

Unnamed: 0,Start date,hour,Registered,Casual,year,month,day
0,2010-09-20,11,2.0,0.0,2010,9,Mon
1,2010-09-20,12,17.0,0.0,2010,9,Mon
2,2010-09-20,13,11.0,0.0,2010,9,Mon
3,2010-09-20,14,4.0,2.0,2010,9,Mon
4,2010-09-20,15,10.0,2.0,2010,9,Mon


In [53]:
df_all_agg_hourly = df_all_agg_hourly.rename(columns = {'Start date': 'date',
                                    'Registered': 'registered',
                                    'Casual': 'casual'})

In [54]:
df_all_agg_daily.head()

Unnamed: 0,date,registered,casual,year,month,day
0,2010-09-20,178,34,2010,9,Mon
1,2010-09-21,215,109,2010,9,Tue
2,2010-09-22,260,117,2010,9,Wed
3,2010-09-23,249,124,2010,9,Thu
4,2010-09-24,206,156,2010,9,Fri


In [55]:
df_all_agg_hourly.head()

Unnamed: 0,date,hour,registered,casual,year,month,day
0,2010-09-20,11,2.0,0.0,2010,9,Mon
1,2010-09-20,12,17.0,0.0,2010,9,Mon
2,2010-09-20,13,11.0,0.0,2010,9,Mon
3,2010-09-20,14,4.0,2.0,2010,9,Mon
4,2010-09-20,15,10.0,2.0,2010,9,Mon


In [71]:
df_all_agg_daily ['year_month'] = df_all_agg_daily['date'].dt.to_period('M')
df_all_agg_hourly ['year_month'] = df_all_agg_hourly['date'].dt.to_period('M')

In [73]:
df_all_agg_daily.head()
df_all_agg_hourly.head()

Unnamed: 0,date,hour,registered,casual,year,month,day,year_month
0,2010-09-20,11,2.0,0.0,2010,9,Mon,2010-09
1,2010-09-20,12,17.0,0.0,2010,9,Mon,2010-09
2,2010-09-20,13,11.0,0.0,2010,9,Mon,2010-09
3,2010-09-20,14,4.0,2.0,2010,9,Mon,2010-09
4,2010-09-20,15,10.0,2.0,2010,9,Mon,2010-09


In [74]:
df_all_agg_daily.info()
df_all_agg_hourly.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2656 entries, 0 to 2655
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        2656 non-null   datetime64[ns]
 1   registered  2656 non-null   int64         
 2   casual      2656 non-null   int64         
 3   year        2656 non-null   int64         
 4   month       2656 non-null   int64         
 5   day         2656 non-null   object        
 6   year_month  2656 non-null   period[M]     
dtypes: datetime64[ns](1), int64(4), object(1), period[M](1)
memory usage: 166.0+ KB
<class 'pandas.core.frame.DataFrame'>
Int64Index: 63240 entries, 0 to 63239
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        63240 non-null  datetime64[ns]
 1   hour        63240 non-null  int64         
 2   registered  63240 non-null  float64       
 3   casual      63240 non-nu

In [75]:
# Export dataframes

df_all_agg_daily.to_csv('cleaned_data/df_all_agg_daily.csv', index = False)
df_all_agg_hourly.to_csv('cleaned_data/df_all_agg_hourly.csv', index = False)



In [76]:
df_all_years.head()

Unnamed: 0,Duration,Start date,End date,Start station number,Start station,End station number,End station,Bike number,Member type
0,1012,2010-09-20,2010-09-20 11:43:56,31208,M St & New Jersey Ave SE,31108,4th & M St SW,W00742,Member
1,61,2010-09-20,2010-09-20 11:42:23,31209,1st & N St SE,31209,1st & N St SE,W00032,Member
2,2690,2010-09-20,2010-09-20 12:50:27,31600,5th & K St NW,31100,19th St & Pennsylvania Ave NW,W00993,Member
3,1406,2010-09-20,2010-09-20 12:29:32,31600,5th & K St NW,31602,Park Rd & Holmead Pl NW,W00344,Member
4,1413,2010-09-20,2010-09-20 12:34:17,31100,19th St & Pennsylvania Ave NW,31201,15th & P St NW,W00883,Member


Want to export df_all_years (full data set) <br>
But the export / import is taking so long. <br>
Maybe work with just 3 years' data first. 

In [78]:
df_all_years.head()

Unnamed: 0,Duration,Start date,End date,Start station number,Start station,End station number,End station,Bike number,Member type
0,1012,2010-09-20,2010-09-20 11:43:56,31208,M St & New Jersey Ave SE,31108,4th & M St SW,W00742,Member
1,61,2010-09-20,2010-09-20 11:42:23,31209,1st & N St SE,31209,1st & N St SE,W00032,Member
2,2690,2010-09-20,2010-09-20 12:50:27,31600,5th & K St NW,31100,19th St & Pennsylvania Ave NW,W00993,Member
3,1406,2010-09-20,2010-09-20 12:29:32,31600,5th & K St NW,31602,Park Rd & Holmead Pl NW,W00344,Member
4,1413,2010-09-20,2010-09-20 12:34:17,31100,19th St & Pennsylvania Ave NW,31201,15th & P St NW,W00883,Member


In [79]:
df_all_years_shortened = df_all_years.copy(deep = True)

In [80]:
df_all_years_shortened = df_all_years_shortened [(df_all_years_shortened['Start date'].dt.year >= 2011) &(df_all_years_shortened['Start date'].dt.year <= 2013)]

In [82]:
df_all_years_shortened.head()
df_all_years_shortened.tail()

Unnamed: 0,Duration,Start date,End date,Start station number,Start station,End station number,End station,Bike number,Member type
5926811,1571,2013-12-31,2014-01-01 00:19:19,31244,4th & E St SW,31271,Constitution Ave & 2nd St NW/DOL,W20384,Member
5926812,1564,2013-12-31,2014-01-01 00:19:31,31244,4th & E St SW,31271,Constitution Ave & 2nd St NW/DOL,W00319,Member
5926813,1687,2013-12-31,2014-01-01 00:23:06,31015,Rosslyn Metro / Wilson Blvd & Ft Myer Dr,31015,Rosslyn Metro / Wilson Blvd & Ft Myer Dr,W01038,Casual
5926814,1583,2013-12-31,2014-01-01 00:23:13,31015,Rosslyn Metro / Wilson Blvd & Ft Myer Dr,31015,Rosslyn Metro / Wilson Blvd & Ft Myer Dr,W00020,Casual
5926815,474,2013-12-31,2014-01-01 00:06:13,31222,New York Ave & 15th St NW,31260,23rd & E St NW,W01407,Member


In [83]:
df_all_years_shortened.to_csv('cleaned_data/df_all_years.csv', index = False)

Run time: 1m 50 s