# Analysis on Lyft's Bay Wheels Bike Sharing Program

# Table of Contents
- Gathering

## Gathering

In [1]:
import os
import requests
import zipfile
import io
import glob
import pandas as pd

In [2]:
#Create folder to store files
folder_name = 'raw_files'
if not os.path.exists(folder_name):
    os.makedirs(folder_name)

In [3]:
#Create function to download and extract files from source
def extract(x):
    #download file and extract
    response= requests.get(x)
    try:
        zp = zipfile.ZipFile(io.BytesIO(response.content))
        zp.extractall(folder_name)
    
    #if not a valid link, ignore and continue the script
    except zipfile.BadZipFile:
        pass

In [4]:
#Get the month year component of the URL
month_year = []
for i in range(2017, 2020):
    if i == 2017:
        month_year.append(str(i))
    else:
        for j in range(1,13):
            month_year.append(str(i) + str(j).zfill(2))  #make sure MM is in double digit

In [None]:
#Re-create URL and download link
for i in month_year:
    if i < '201905':
        old_url = 'https://s3.amazonaws.com/baywheels-data/{}-fordgobike-tripdata.csv.zip'.format(i)
        extract(old_url)
    else:
        new_url = 'https://s3.amazonaws.com/baywheels-data/{}-baywheels-tripdata.csv.zip'.format(i)
        extract(new_url)

In [5]:
#Concat all csv into one dataframe
df = pd.concat((pd.read_csv(files, dtype={'bike_share_for_all_trip': object, 'rental_access_method': object}) for files in glob.glob(folder_name+'/*.csv')), sort=False, ignore_index=True)

In [7]:
#Ensure dataframe is working
df.sample(10)

Unnamed: 0,duration_sec,start_time,end_time,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bike_id,user_type,bike_share_for_all_trip,rental_access_method
3121384,2230,2019-04-14 09:59:35.8240,2019-04-14 10:36:46.3130,242.0,Milvia St at Derby St,37.860125,-122.269384,246.0,Berkeley Civic Center,37.86906,-122.270556,3545,Subscriber,Yes,
1920012,212,2018-10-31 15:05:13.6200,2018-10-31 15:08:46.5510,239.0,Bancroft Way at Telegraph Ave,37.868813,-122.258764,246.0,Berkeley Civic Center,37.86906,-122.270556,3599,Subscriber,Yes,
2644387,223,2019-02-19 20:52:28.7030,2019-02-19 20:56:12.2560,321.0,5th St at Folsom,37.780146,-122.403071,78.0,Folsom St at 9th St,37.773717,-122.411647,5349,Subscriber,No,
4417697,1975,2019-10-18 09:14:17.7570,2019-10-18 09:47:13.3260,43.0,San Francisco Public Library (Grove St at Hyde...,37.778768,-122.415929,377.0,Fell St at Stanyan St,37.771951,-122.453705,2270,Customer,No,
3110543,119,2019-04-16 09:03:18.3600,2019-04-16 09:05:18.0310,37.0,2nd St at Folsom St,37.785377,-122.396906,343.0,Bryant St at 2nd St,37.783172,-122.393572,1323,Subscriber,No,
1778534,376,2018-09-23 15:06:26.3280,2018-09-23 15:12:42.5190,324.0,Union Square (Powell St at Post St),37.7883,-122.408531,5.0,Powell St BART Station (Market St at 5th St),37.783899,-122.408445,4391,Subscriber,No,
2408677,516,2019-01-29 09:45:03.3110,2019-01-29 09:53:39.9040,3.0,Powell St BART Station (Market St at 4th St),37.786375,-122.404904,81.0,Berry St at 4th St,37.77588,-122.39317,1310,Subscriber,No,
242504,197,2017-10-15 09:03:13.0350,2017-10-15 09:06:30.6560,108.0,16th St Mission BART,37.76471,-122.419957,112.0,Harrison St at 17th St,37.763847,-122.413004,2553,Subscriber,,
4444973,337,2019-10-15 08:26:37.1570,2019-10-15 08:32:15.1350,30.0,San Francisco Caltrain (Townsend St at 4th St),37.776598,-122.395282,350.0,8th St at Brannan St,37.771431,-122.405787,9729,Subscriber,No,
4510129,2132,2019-10-06 18:28:32.1650,2019-10-06 19:04:04.2230,180.0,Telegraph Ave at 23rd St,37.812678,-122.268773,259.0,Addison St at Fourth St,37.866249,-122.299371,12632,Subscriber,No,
