In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime 
import seaborn as sb
import scipy.stats as stat
%matplotlib inline
import random

In [3]:
bikeshare = pd.read_csv('bikeshare_reduced.csv', 
                        parse_dates=['Start date', 'End date', 'start_date_short', 'end_date_short'])
bikeshare.drop('Unnamed: 0', 1, inplace=True)

## Registered Riders vs Casual Riders

In [20]:
casual = bikeshare[bikeshare['Member Type'] == 'Casual']['time_diff']
registered = bikeshare[bikeshare['Member Type'] == 'Registered']['time_diff']
casual_mean = np.mean(casual)
registered_mean = np.mean(registered)
difference = casual_mean - registered_mean
print('The difference the ride times for casual riders and the registered riders is ' + str(round(difference, 2)) + ' minutes.')

The difference the ride times for casual riders and the registered riders is 13.73 minutes.


The differences between the means are 13.73 minutes. Knowing that, we'll need to do a hypothesis test to determine is this difference was due to random chance or if they are actually different.
<br>
Null Hypothesis: The differences in ride time means between Registered Riders and Casual Riders are equal.
<br>
Alternative Hypothesis: The difference in ride time means between Registered Riders and Casual Riders are different.
<br>
Significance: 0.05

In [8]:
time_test = stat.ttest_ind(casual, registered, equal_var=False)
p_val = time_test[1]
printp_val

0.0

With this result (p-value < 0.00001), we can reject the null hypothesis that the two means between registered riders and casual riders are equal. We can argue that the means are different and that the type of rider effects the ride time.

## Seasonal Ride Times

In [13]:
spring = bikeshare[bikeshare['season'] == 1]['time_diff']
summer = bikeshare[bikeshare['season'] == 2]['time_diff']
fall = bikeshare[bikeshare['season'] == 3]['time_diff']
winter = bikeshare[bikeshare['season'] == 4]['time_diff']

season = stat.f_oneway(spring, summer, fall, winter)
p_val_season = season[1]
p_val_season

0.0

In [14]:
spring_summer = stat.ttest_ind(spring, summer, equal_var=False)
spring_summer_p = spring_summer[1]

spring_fall = stat.ttest_ind(spring, fall, equal_var=False)
spring_fall_p = spring_fall[1]

spring_winter = stat.ttest_ind(spring, winter, equal_var=False)
spring_winter_p = spring_winter[1]

summer_fall = stat.ttest_ind(summer, fall, equal_var=False)
summer_fall_p = summer_fall[1]

summer_winter = stat.ttest_ind(summer, winter, equal_var=False)
summer_winter_p = summer_winter[1]

fall_winter = stat.ttest_ind(fall, winter, equal_var=False)
fall_winter_p = fall_winter[1]

print(spring_summer_p)
print(spring_fall_p)
print(spring_winter_p)
print(summer_fall_p)
print(summer_winter_p)
print(fall_winter_p)

0.0
0.0
2.78861876229e-272
2.01962674215e-48
0.0
0.0


# Seasonal Ride Times: Casual vs Registered

In [17]:
casual_spring = bikeshare[(bikeshare['Member Type'] == 'Casual') & (bikeshare['season'] == 1)]['time_diff']
casual_summer = bikeshare[(bikeshare['Member Type'] == 'Casual') & (bikeshare['season'] == 2)]['time_diff']
casual_fall = bikeshare[(bikeshare['Member Type'] == 'Casual') & (bikeshare['season'] == 3)]['time_diff']
casual_winter = bikeshare[(bikeshare['Member Type'] == 'Casual') & (bikeshare['season'] == 4)]['time_diff']
registered_spring = bikeshare[(bikeshare['Member Type'] == 'Registered') & (bikeshare['season'] == 1)]['time_diff']
registered_summer = bikeshare[(bikeshare['Member Type'] == 'Registered') & (bikeshare['season'] == 2)]['time_diff']
registered_fall = bikeshare[(bikeshare['Member Type'] == 'Registered') & (bikeshare['season'] == 3)]['time_diff']
registered_winter = bikeshare[(bikeshare['Member Type'] == 'Registered') & (bikeshare['season'] == 4)]['time_diff']

reg_cas_spring = stat.ttest_ind(casual_spring, registered_spring, equal_var=False)
reg_cas_spring_p = reg_cas_spring[1]

reg_cas_summer = stat.ttest_ind(casual_summer, registered_summer, equal_var=False)
reg_cas_summer_p = reg_cas_summer[1]

reg_cas_fall = stat.ttest_ind(casual_fall, registered_fall, equal_var=False)
reg_cas_fall_p = reg_cas_fall[1]

reg_cas_winter = stat.ttest_ind(casual_winter, registered_winter, equal_var=False)
reg_cas_winter_p = reg_cas_winter[1]

print(reg_cas_spring_p)
print(reg_cas_summer_p)
print(reg_cas_fall_p)
print(reg_cas_winter_p)

0.0
0.0
0.0
0.0


## Weather Category

In [24]:
sunny = bikeshare[bikeshare['weathersit'] == 1]['time_diff']
less_sunny = bikeshare[bikeshare['weathersit'] == 2]['time_diff']
lousy = bikeshare[bikeshare['weathersit'] == 3]['time_diff']

In [25]:
weather_anova = stat.f_oneway(sunny, less_sunny, lousy)
weather_anova_p = weather_anova[1]
weather_anova_p

0.0

In [26]:
sunny_less = stat.ttest_ind(sunny, less_sunny, equal_var=False)
sunny_less_p = sunny_less[1]

sunny_lousy = stat.ttest_ind(sunny, lousy, equal_var=False)
sunny_lousy_p = sunny_lousy[1]

less_lousy = stat.ttest_ind(less_sunny, lousy, equal_var=False)
less_lousy_p = less_lousy[1]

print(sunny_less_p)
print(sunny_lousy_p)
print(less_lousy_p)

2.80975478368e-185
0.0
0.0


## Weather Category: Casual vs Registered

In [31]:
sunny_casual = bikeshare[(bikeshare['weathersit'] == 1) & (bikeshare['Member Type'] == 'Casual')]['time_diff']
sunny_reg = bikeshare[(bikeshare['weathersit'] == 1) & (bikeshare['Member Type'] == 'Registered')]['time_diff']

less_casual = bikeshare[(bikeshare['weathersit'] == 2) & (bikeshare['Member Type'] == 'Casual')]['time_diff']
less_reg = bikeshare[(bikeshare['weathersit'] == 2) & (bikeshare['Member Type'] == 'Registered')]['time_diff']

lousy_casual = bikeshare[(bikeshare['weathersit'] == 3) & (bikeshare['Member Type'] == 'Casual')]['time_diff']
lousy_reg = bikeshare[(bikeshare['weathersit'] == 3) & (bikeshare['Member Type'] == 'Registered')]['time_diff']

In [32]:
sunny_diff = stat.ttest_ind(sunny_casual, sunny_reg, equal_var=False)
sunny_diff_p = sunny_diff[1]

less_diff = stat.ttest_ind(less_casual, less_reg, equal_var=False)
less_diff_p = less_diff[1]

lousy_diff = stat.ttest_ind(lousy_casual, lousy_reg, equal_var=False)
lousy_diff_p = lousy_diff[1]

print(sunny_diff_p)
print(less_diff_p)
print(lousy_diff_p)

0.0
0.0
8.05155049416e-105


## Holiday

In [35]:
no_holiday = bikeshare[bikeshare['holiday'] == 0]['time_diff']
holiday = bikeshare[bikeshare['holiday'] == 1]['time_diff']



In [36]:
holiday_t = stat.ttest_ind(no_holiday, holiday, equal_var=False)
holiday_t_p = holiday_t[1]
print(holiday_t_p)

5.94089337013e-98


## Holiday: Casual vs Registered

In [38]:
no_hol_cas = bikeshare[(bikeshare['holiday'] == 0) & (bikeshare['Member Type'] == 'Casual')]['time_diff']
no_hol_reg = bikeshare[(bikeshare['holiday'] == 0) & (bikeshare['Member Type'] == 'Registered')]['time_diff']
hol_cas = bikeshare[(bikeshare['holiday'] == 1) & (bikeshare['Member Type'] == 'Casual')]['time_diff']
hol_reg = bikeshare[(bikeshare['holiday'] == 1) & (bikeshare['Member Type'] == 'Registered')]['time_diff']

In [39]:
no_hol_t = stat.ttest_ind(no_hol_cas, no_hol_reg, equal_var=False)
no_hol_t_p = no_hol_t[1]

hol_t = stat.ttest_ind(hol_cas, hol_reg, equal_var=False)
hol_t_p = hol_t[1]

print(no_hol_t_p)
print(hol_t_p)

0.0
0.0


## Workday

In [42]:
workday = bikeshare[bikeshare['workingday'] == 1]['time_diff']
no_workday = bikeshare[bikeshare['workingday'] == 0]['time_diff']


In [44]:
workday_t = stat.ttest_ind(workday, no_workday, equal_var=False)
workday_t_p = workday_t[1]

print(workday_t_p)

0.0


## Workday: Registered vs Casual

In [47]:
no_work_cas = bikeshare[(bikeshare['workingday'] == 0) & (bikeshare['Member Type'] == 'Casual')]['time_diff']
no_work_reg = bikeshare[(bikeshare['workingday'] == 0) & (bikeshare['Member Type'] == 'Registered')]['time_diff']
work_cas = bikeshare[(bikeshare['workingday'] == 1) & (bikeshare['Member Type'] == 'Casual')]['time_diff']
work_reg = bikeshare[(bikeshare['workingday'] == 1) & (bikeshare['Member Type'] == 'Registered')]['time_diff']

In [48]:
no_working = stat.ttest_ind(no_work_cas, no_work_reg, equal_var=False)
no_working_p = no_working[1]

working_t = stat.ttest_ind(work_cas, work_reg, equal_var=False)
working_t_p = working_t[1]

print(no_working_p)
print(working_t_p)

0.0
0.0


## Ride Times vs. Temperature

In [52]:
time_temp = np.corrcoef(bikeshare['temp'], bikeshare['time_diff'])
time_temp[0,1]

0.095882386933345623

In [55]:
casual_corr = bikeshare[bikeshare['Member Type'] == 'Casual']
registered_corr = bikeshare[bikeshare['Member Type'] == 'Registered']

In [56]:
time_temp_cas = np.corrcoef(casual_corr['temp'], casual_corr['time_diff'])
time_temp_cas[0,1]

0.0076346430014268855

In [57]:
time_temp_reg = np.corrcoef(registered_corr['temp'], registered_corr['time_diff'])
time_temp_reg[0,1]

0.083729130500907911

## Ride Times vs. Humidity

In [53]:
time_hum = np.corrcoef(bikeshare['hum'], bikeshare['time_diff'])
time_hum[0,1]

0.008031838150257169

In [58]:
time_hum_cas = np.corrcoef(casual_corr['hum'], casual_corr['time_diff'])
time_hum_cas[0,1]

-0.0041247100896568334

In [59]:
time_hum_reg = np.corrcoef(registered_corr['hum'], registered_corr['time_diff'])
time_hum_reg[0,1]

0.0018546164180973683

## Ride Times vs Wind Speed

In [54]:
time_wind = np.corrcoef(bikeshare['windspeed'], bikeshare['time_diff'])
time_wind[0,1]

-0.031826321312461395

In [60]:
time_wind_cas = np.corrcoef(casual_corr['windspeed'], casual_corr['time_diff'])
time_wind_cas[0,1]

-0.011978496978161309

In [61]:
time_wind_reg = np.corrcoef(registered_corr['windspeed'], registered_corr['time_diff'])
time_wind_reg[0,1]

-0.01943016317449368