In [2]:
# Dependencies
import pandas as pd
import plotly.express as px
import os
from scipy.stats import linregress
from sklearn import datasets
import statsmodels.api as sm
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
bikecounts_file = os.path.join('../','Resources', 'bikecounts.csv')
bikecounts_df = pd.read_csv(bikecounts_file)
bikecounts_df['Day'] = pd.to_datetime(bikecounts_df['Day'])
bikecounts_df.head()

Unnamed: 0.1,Unnamed: 0,Date,Day,High Temp (°F),Low Temp (°F),Precipitation,Brooklyn Bridge,Manhattan Bridge,Williamsburg Bridge,Queensboro Bridge,Total
0,0,2016-04-01,2016-04-01,78.1,66.0,0.01,1704.0,3126,4115.0,2552.0,11497
1,1,2016-04-02,2016-04-02,55.0,48.9,0.15,827.0,1646,2565.0,1884.0,6922
2,2,2016-04-03,2016-04-03,39.9,34.0,0.09,526.0,1232,1695.0,1306.0,4759
3,3,2016-04-04,2016-04-04,44.1,33.1,0.47,521.0,1067,1440.0,1307.0,4335
4,4,2016-04-05,2016-04-05,42.1,26.1,0.0,1416.0,2617,3081.0,2357.0,9471


In [4]:
bikecounts_df = pd.DataFrame(bikecounts_df)

In [5]:
bikecounts_df = bikecounts_df.rename({"Unnamed: 0":"a"}, axis=1)
bikecounts_df = bikecounts_df.drop(['a','Date','Day'],axis=1)

In [23]:
bikecounts_df.head()

Unnamed: 0,High Temp (°F),Low Temp (°F),Precipitation,Brooklyn Bridge,Manhattan Bridge,Williamsburg Bridge,Queensboro Bridge,Total
0,78.1,66.0,0.01,1704.0,3126,4115.0,2552.0,11497
1,55.0,48.9,0.15,827.0,1646,2565.0,1884.0,6922
2,39.9,34.0,0.09,526.0,1232,1695.0,1306.0,4759
3,44.1,33.1,0.47,521.0,1067,1440.0,1307.0,4335
4,42.1,26.1,0.0,1416.0,2617,3081.0,2357.0,9471


In [7]:
bikecounts_df['Brooklyn Bridge'].sum()

68089.0

In [8]:
bikecounts_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   High Temp (°F)       30 non-null     float64
 1   Low Temp (°F)        30 non-null     float64
 2   Precipitation        30 non-null     float64
 3   Brooklyn Bridge      30 non-null     float64
 4   Manhattan Bridge     30 non-null     int64  
 5   Williamsburg Bridge  30 non-null     float64
 6   Queensboro Bridge    30 non-null     float64
 7   Total                30 non-null     int64  
dtypes: float64(6), int64(2)
memory usage: 2.0 KB


In [9]:
bikecounts_df.describe()

Unnamed: 0,High Temp (°F),Low Temp (°F),Precipitation,Brooklyn Bridge,Manhattan Bridge,Williamsburg Bridge,Queensboro Bridge,Total
count,30.0,30.0,30.0,30.0,30.0,30.0,30.0,30.0
mean,60.58,46.413333,0.052333,2269.633333,4049.533333,4862.466667,3352.866667,14534.5
std,11.347289,9.662502,0.103647,995.633241,1729.740976,1840.652747,1115.381263,5650.877227
min,39.9,26.1,0.0,504.0,997.0,1440.0,1306.0,4335.0
25%,55.5,44.1,0.0,1511.25,2744.25,3425.5,2480.75,10071.25
50%,62.1,46.9,0.0,2379.5,4165.0,5194.0,3477.0,15292.5
75%,68.0,50.0,0.08,3104.0,5249.75,6021.25,4172.75,18281.25
max,81.0,66.0,0.47,3871.0,6951.0,7834.0,5032.0,23318.0


In [21]:
# Select your independent X terms, and your dependent y term. 
X = bikecounts_df[["High Temp (°F)","Low Temp (°F)","Precipitation"]]
y = bikecounts_df['Brooklyn Bridge']
X = sm.add_constant(X)
model = sm.OLS(y, X)
results = model.fit()

In [22]:
print(results.summary())

                            OLS Regression Results                            
Dep. Variable:        Brooklyn Bridge   R-squared:                       0.712
Model:                            OLS   Adj. R-squared:                  0.679
Method:                 Least Squares   F-statistic:                     21.46
Date:                Fri, 26 Mar 2021   Prob (F-statistic):           3.31e-07
Time:                        20:13:13   Log-Likelihood:                -230.47
No. Observations:                  30   AIC:                             468.9
Df Residuals:                      26   BIC:                             474.5
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                     coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------
const           -718.7735    626.377     -1.

# Explanation

The Null Hypothesis: 
1. The number of bikes crossing Brooklyn Bridge is dependent on Precipitation and Temperature.

The Alternative Hypothesis:
1. The number of bikes crossing Brooklyn Bridge is not dependent on Precipitation and Temperature.

#Analysis of the OLS Regression Results (P-value)
1. High Temp (°F): Pvalue = 0.000 indicates a high statistically significant correlation 
   between high temperature and bikes crossing Brooklyn Bridge.
   
3. Low Temp (°F): Pvalue = 0.104 is above the significance level of 0.05, and may not have much
   significance in determining the frequency at which bikes cross the Brooklyn Bridge.
   
2. Precipitation: Pvalue = 0.006 indicates a high statistically significant correlation
   between precipitation and bikes crossing Brooklyn Bridge.
   
A p-value less than 0.05 is statistically significant. It indicates strong evidence against the null hypothesis, as there is less than a 5% probability the null is correct and therefore the results are random. 

We therefore reject the null hypothesis, and accept the alternative hypothesis.
