# San Francisco Bay Area Bike Share Exloratory Data analysis using Dask

In [6]:
import numpy as np
import pandas as pd
import dask.dataframe as dd
from dask.delayed import delayed
from dask import compute
from dask.distributed import Client, progress
import os
import datetime

## Import Data

In [41]:
df_status = dd.read_csv('../011_dask/data/status.csv')
df_station = dd.read_csv('../011_dask/data/station.csv')
df_trip = dd.read_csv('../011_dask/data/trip.csv', dtype={'zip_code': 'object'})
df_weather = dd.read_csv('../011_dask/data/weather.csv')

## Data Preparation and Cleaning
- Look at the info about dataframes (rows, columns)
- Fix any missing or incorrect values
- Downsize the data?

In [45]:
print(f'Status columns: \n{df_status.columns}')
print(f'Station columns: \n{df_station.columns}')
print(f'Trip columns: \n{df_trip.columns}')
print(f'Weather columns: \n{df_weather.columns}')

Status columns: 
Index(['station_id', 'bikes_available', 'docks_available', 'time'], dtype='object')
Station columns: 
Index(['id', 'name', 'lat', 'long', 'dock_count', 'city', 'installation_date'], dtype='object')
Trip columns: 
Index(['id', 'duration', 'start_date', 'start_station_name',
       'start_station_id', 'end_date', 'end_station_name', 'end_station_id',
       'bike_id', 'subscription_type', 'zip_code'],
      dtype='object')
Weather columns: 
Index(['date', 'max_temperature_f', 'mean_temperature_f', 'min_temperature_f',
       'max_dew_point_f', 'mean_dew_point_f', 'min_dew_point_f',
       'max_humidity', 'mean_humidity', 'min_humidity',
       'max_sea_level_pressure_inches', 'mean_sea_level_pressure_inches',
       'min_sea_level_pressure_inches', 'max_visibility_miles',
       'mean_visibility_miles', 'min_visibility_miles', 'max_wind_Speed_mph',
       'mean_wind_speed_mph', 'max_gust_speed_mph', 'precipitation_inches',
       'cloud_cover', 'events', 'wind_dir_degree

### Station

In [58]:
df_station.tail()

Unnamed: 0,id,name,lat,long,dock_count,city,installation_date
65,77,Market at Sansome,37.789625,-122.400811,27,San Francisco,8/25/2013
66,80,Santa Clara County Civic Center,37.352601,-121.905733,15,San Jose,12/31/2013
67,82,Broadway St at Battery St,37.798541,-122.400862,15,San Francisco,1/22/2014
68,83,Mezes Park,37.491269,-122.236234,15,Redwood City,2/20/2014
69,84,Ryland Park,37.342725,-121.895617,15,San Jose,4/9/2014


In [62]:
len(df_status)

71984434

In [56]:
df_weather.tail()

Unnamed: 0,date,max_temperature_f,mean_temperature_f,min_temperature_f,max_dew_point_f,mean_dew_point_f,min_dew_point_f,max_humidity,mean_humidity,min_humidity,...,mean_visibility_miles,min_visibility_miles,max_wind_Speed_mph,mean_wind_speed_mph,max_gust_speed_mph,precipitation_inches,cloud_cover,events,wind_dir_degrees,zip_code
3660,8/27/2015,92.0,78.0,63.0,57.0,51.0,40.0,78.0,48.0,18.0,...,10.0,10.0,23.0,6.0,29.0,0,3.0,,313.0,95113
3661,8/28/2015,95.0,80.0,64.0,64.0,56.0,52.0,93.0,60.0,26.0,...,10.0,10.0,25.0,7.0,30.0,0,3.0,,307.0,95113
3662,8/29/2015,80.0,72.0,64.0,65.0,62.0,54.0,93.0,70.0,47.0,...,10.0,10.0,21.0,9.0,26.0,0,4.0,,312.0,95113
3663,8/30/2015,78.0,70.0,62.0,60.0,57.0,53.0,84.0,64.0,43.0,...,10.0,10.0,22.0,10.0,29.0,0,3.0,,291.0,95113
3664,8/31/2015,85.0,72.0,59.0,59.0,55.0,51.0,84.0,58.0,32.0,...,10.0,10.0,20.0,6.0,24.0,0,1.0,,308.0,95113


In [63]:
len(df_weather)

3665

In [57]:
df_trip.tail()

Unnamed: 0,id,duration,start_date,start_station_name,start_station_id,end_date,end_station_name,end_station_id,bike_id,subscription_type,zip_code
133707,432951,619,9/1/2014 4:21,Powell Street BART,39,9/1/2014 4:32,Townsend at 7th,65,335,Subscriber,94118
133708,432950,6712,9/1/2014 3:16,Harry Bridges Plaza (Ferry Building),50,9/1/2014 5:08,San Francisco Caltrain (Townsend at 4th),70,259,Customer,44100
133709,432949,538,9/1/2014 0:05,South Van Ness at Market,66,9/1/2014 0:14,5th at Howard,57,466,Customer,32
133710,432948,568,9/1/2014 0:05,South Van Ness at Market,66,9/1/2014 0:15,5th at Howard,57,461,Customer,32
133711,432947,569,9/1/2014 0:05,South Van Ness at Market,66,9/1/2014 0:15,5th at Howard,57,318,Customer,32


In [64]:
len(df_trip)

669959

## Exploratory Data Analysis and Visualisation

## Ask and Answer Questions
- How does weather impact bike trips?
- How do bike trip patterns vary by time of day and the day of the week?

## Summary and Conclusion

## Notes