# Table of Contents
 <p>

In [1]:
import pandas as pd
from zipfile import ZipFile
from StringIO import StringIO
import urllib2

In [2]:
# Pull down Citibike data into memory. 
ride_frames = []

# A list of months, valid for 01-12 
months = ['01', '02', '03', '04', '05', '06']
for month in months:
    url = "https://s3.amazonaws.com/tripdata/2015%s-citibike-tripdata.zip" % month
    r = urllib2.urlopen(url).read()
    f = ZipFile(StringIO(r))
    csv = f.open("2015%s-citibike-tripdata.csv" % month)
    frame = pd.read_csv(csv)
    
    # Citibike date format is inconsistent between months...
    try:
        frame['starttime'] = pd.to_datetime(frame.starttime, format='%m/%d/%Y %H:%M:%S')
    except:
        try:
            frame['starttime'] = pd.to_datetime(frame.starttime, format='%m/%d/%Y %H:%M')
        except:
            frame['starttime'] = pd.to_datetime(frame.starttime)
    try:
        frame['stoptime'] = pd.to_datetime(frame.stoptime, format='%m/%d/%Y %H:%M:%S')
    except:
        try:
            frame['stoptime'] = pd.to_datetime(frame.stoptime, format='%m/%d/%Y %H:%M')
        except:
            frame['stoptime'] = pd.to_datetime(frame.stoptime)      
    ride_frames.append(frame)
    
# Concatenate all the individual months into one.
rides = pd.concat(ride_frames, ignore_index=True)

In [3]:
# Add separate day and date attributes to the dataframe
rides['day'] = rides['starttime'].dt.dayofweek
rides['date'] = rides['starttime'].dt.dayofyear

In [4]:
rides.head()

Unnamed: 0,tripduration,starttime,stoptime,start station id,start station name,start station latitude,start station longitude,end station id,end station name,end station latitude,end station longitude,bikeid,usertype,birth year,gender,day,date
0,1346,2015-01-01 00:01:00,2015-01-01 00:24:00,455,1 Ave & E 44 St,40.75002,-73.969053,265,Stanton St & Chrystie St,40.722293,-73.991475,18660,Subscriber,1960.0,2,3,1
1,363,2015-01-01 00:02:00,2015-01-01 00:08:00,434,9 Ave & W 18 St,40.743174,-74.003664,482,W 15 St & 7 Ave,40.739355,-73.999318,16085,Subscriber,1963.0,1,3,1
2,346,2015-01-01 00:04:00,2015-01-01 00:10:00,491,E 24 St & Park Ave S,40.740964,-73.986022,505,6 Ave & W 33 St,40.749013,-73.988484,20845,Subscriber,1974.0,1,3,1
3,182,2015-01-01 00:04:00,2015-01-01 00:07:00,384,Fulton St & Waverly Ave,40.683178,-73.965964,399,Lafayette Ave & St James Pl,40.688515,-73.964763,19610,Subscriber,1969.0,1,3,1
4,969,2015-01-01 00:05:00,2015-01-01 00:21:00,474,5 Ave & E 29 St,40.745168,-73.986831,432,E 7 St & Avenue A,40.726218,-73.983799,20197,Subscriber,1977.0,1,3,1


In [4]:
rides.columns = [x.strip().replace(' ', '_') for x in rides.columns]

In [7]:
rides.to_csv('citibike_rides.csv', index=False)

In [8]:
rides.head()

Unnamed: 0,tripduration,starttime,stoptime,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bikeid,usertype,birth_year,gender,day,date
0,1346,2015-01-01 00:01:00,2015-01-01 00:24:00,455,1 Ave & E 44 St,40.75002,-73.969053,265,Stanton St & Chrystie St,40.722293,-73.991475,18660,Subscriber,1960.0,2,3,1
1,363,2015-01-01 00:02:00,2015-01-01 00:08:00,434,9 Ave & W 18 St,40.743174,-74.003664,482,W 15 St & 7 Ave,40.739355,-73.999318,16085,Subscriber,1963.0,1,3,1
2,346,2015-01-01 00:04:00,2015-01-01 00:10:00,491,E 24 St & Park Ave S,40.740964,-73.986022,505,6 Ave & W 33 St,40.749013,-73.988484,20845,Subscriber,1974.0,1,3,1
3,182,2015-01-01 00:04:00,2015-01-01 00:07:00,384,Fulton St & Waverly Ave,40.683178,-73.965964,399,Lafayette Ave & St James Pl,40.688515,-73.964763,19610,Subscriber,1969.0,1,3,1
4,969,2015-01-01 00:05:00,2015-01-01 00:21:00,474,5 Ave & E 29 St,40.745168,-73.986831,432,E 7 St & Avenue A,40.726218,-73.983799,20197,Subscriber,1977.0,1,3,1
