# Data Analysis with Pandas:  Seattle Bike Share Program Usage
### Paul Leonard on 11/29/2020


## Bringing in pandas library and the data

In [3]:
import pandas as pd

df_station = pd.read_csv('./station.csv')
df_trip = pd.read_csv('./trip.csv')
df_weather = pd.read_csv('./weather.csv')

## What is the average trip duration (in minutes) for a borrowed bicycle?

In [7]:
avg_trip_duration = df_trip["tripduration"].mean()/60
avg_trip_duration

19.639238067208627

## What's the most common age of a bicycle-sharer?

In [24]:
# df_trip["rider_age"] = pd.DatetimeIndex(df_trip["starttime"].apply(pd.to_datetime)).year
# Was working to take the year of each start time and subtract it from teh rider... but it became a lot of processing and took awhile... so going to do more of an estimate
# The data is from years 2014-2016, so going to make the assumption that the average and most common trip start date year is 2015
# now can do simple math from the birth year to the assumed travel year of 2015
# median_trip_start = df_trip["starttime"].median()
# could not convert string to floats... so going to assume 2015 (could possibly use regex to dig out years)
most_common_birth_year = df_trip["birthyear"].mode()
assumed_median_trip_start_year = 2015
most_common_age_of_rider = assumed_median_trip_start_year - most_common_birth_year
most_common_age_of_rider

0    28.0
dtype: float64

## Given all the weather data here, find the average precipitation per month, and the median precipitation.

In [57]:
#converts the date format to a standard one, then uses a panda method to pull off the month, and stores that month in a new series (column) called "month"
df_weather["month"] = pd.DatetimeIndex(df_weather["Date"].apply(pd.to_datetime)).month
# df_weather[["month","Precipitation_In"]].groupby(by="month").median()
df_monthly_precip = df_weather[["month","Precipitation_In"]].groupby(by="month")
# df_monthly_precip.median()
# df_monthly_precip.mean()
# df_monthly_precip.head(4)

d = {"month": [1,2,3,4,5,6,7,8,9,10,11,12]}
df_summary_precip_table = pd.DataFrame(data=d)
df_summary_precip_table["monthly_median"] = df_monthly_precip.median()
df_summary_precip_table["monthly_average"] = df_monthly_precip.mean()
df_summary_precip_table[["month","monthly_average","monthly_median"]].set_index("month")


Unnamed: 0_level_0,monthly_average,monthly_median
month,Unnamed: 1_level_1,Unnamed: 2_level_1
1,,
2,0.143548,0.02
3,0.168421,0.04
4,0.156935,0.025
5,0.051333,0.0
6,0.012419,0.0
7,0.0305,0.0
8,0.012097,0.0
9,0.018226,0.0
10,0.041,0.0


## What’s the average number of bikes at a given bike station?

In [58]:
avg_bike_per_station = df_station["current_dockcount"].mean()
avg_bike_per_station

16.517241379310345

## When a bike station is modified, is it more likely that it’ll lose bikes or gain bikes? How do you know?  (Bikes were reduced by an average of 3.8 bikes when modifications occured.)

In [91]:
# df_station[df_station["modification_date"] == "11/9/2015"][["station_id","modification_date"]]
# df_station[df_station["modification_date"] != "11/9/2015"][["station_id","modification_date"]]
# df_station[df_station["modification_date"] != "NaN"][["station_id","modification_date"]]
# df_station[pd.isnull(df_station["modification_date"])][["station_id","modification_date"]]
df_station["bike_delta_for_mod"] = df_station["current_dockcount"] - df_station["install_dockcount"]
df_station[pd.notnull(df_station["modification_date"])][["station_id","modification_date"]]
avg_bike_delta_for_mod = df_station[pd.notnull(df_station["modification_date"])]["bike_delta_for_mod"].mean()
avg_bike_delta_for_mod

-3.764705882352941

df_trip.head(5)

In [13]:
df_weather.head(5)

Unnamed: 0,Date,Max_Temperature_F,Mean_Temperature_F,Min_TemperatureF,Max_Dew_Point_F,MeanDew_Point_F,Min_Dewpoint_F,Max_Humidity,Mean_Humidity,Min_Humidity,...,Mean_Sea_Level_Pressure_In,Min_Sea_Level_Pressure_In,Max_Visibility_Miles,Mean_Visibility_Miles,Min_Visibility_Miles,Max_Wind_Speed_MPH,Mean_Wind_Speed_MPH,Max_Gust_Speed_MPH,Precipitation_In,Events
0,10/13/2014,71,62.0,54,55,51,46,87,68,46,...,29.79,29.65,10,10,4,13,4,21,0.0,Rain
1,10/14/2014,63,59.0,55,52,51,50,88,78,63,...,29.75,29.54,10,9,3,10,5,17,0.11,Rain
2,10/15/2014,62,58.0,54,53,50,46,87,77,67,...,29.71,29.51,10,9,3,18,7,25,0.45,Rain
3,10/16/2014,71,61.0,52,49,46,42,83,61,36,...,29.95,29.81,10,10,10,9,4,-,0.0,Rain
4,10/17/2014,64,60.0,57,55,51,41,87,72,46,...,29.78,29.73,10,10,6,8,3,-,0.14,Rain


In [14]:
df_station.head(5)

Unnamed: 0,station_id,name,lat,long,install_date,install_dockcount,modification_date,current_dockcount,decommission_date
0,BT-01,3rd Ave & Broad St,47.618418,-122.350964,10/13/2014,18,,18,
1,BT-03,2nd Ave & Vine St,47.615829,-122.348564,10/13/2014,16,,16,
2,BT-04,6th Ave & Blanchard St,47.616094,-122.341102,10/13/2014,16,,16,
3,BT-05,2nd Ave & Blanchard St,47.61311,-122.344208,10/13/2014,14,,14,
4,CBD-03,7th Ave & Union St,47.610731,-122.332447,10/13/2014,20,,20,


In [92]:
df_trip.head(5)

Unnamed: 0,trip_id,starttime,stoptime,bikeid,tripduration,from_station_name,to_station_name,from_station_id,to_station_id,usertype,gender,birthyear,rider_age
0,431,10/13/2014 10:31,10/13/2014 10:48,SEA00298,985.935,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Male,1960.0,2014
1,432,10/13/2014 10:32,10/13/2014 10:48,SEA00195,926.375,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Male,1970.0,2014
2,433,10/13/2014 10:33,10/13/2014 10:48,SEA00486,883.831,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Female,1988.0,2014
3,434,10/13/2014 10:34,10/13/2014 10:48,SEA00333,865.937,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Female,1977.0,2014
4,435,10/13/2014 10:34,10/13/2014 10:49,SEA00202,923.923,2nd Ave & Spring St,Occidental Park / Occidental Ave S & S Washing...,CBD-06,PS-04,Member,Male,1971.0,2014
