In [1]:
import matplotlib.pyplot as plt
import matplotlib as mpl

#使用中文字体
mpl.rcParams['font.family']=['Microsoft Yahei','sans-serif']
mpl.rcParams['axes.unicode_minus']=False

#使用svg格式，避免图形模糊
%matplotlib inline
%config InlineBackend.figure_format="svg"

In [43]:
import pandas as pd
df=pd.read_csv("nyc-east-river-bicycle-counts.csv",encoding="GBK")
df.head()

Unnamed: 0,Date,Day,High Temp (°F),Low Temp (°F),Precipitation,Brooklyn Bridge,Manhattan Bridge,Williamsburg Bridge,Queensboro Bridge,Total
0,4/1,Saturday,46.0,37.0,0.0,606,1446,1915,1430,5397
1,4/2,Sunday,62.1,41.0,0.0,2021,3943,4207,2862,13033
2,4/3,Monday,63.0,50.0,0.03,2470,4988,5178,3689,16325
3,4/4,Tuesday,51.1,46.0,1.18,723,1913,2279,1666,6581
4,4/5,Wednesday,63.0,46.0,0.0,2807,5276,5711,4197,17991


In [44]:
# T 表示 Trace of Precipitation（有降水的痕迹）
# 将Precipitation(降水量)列转换为float型
df["Precipitation"] = df["Precipitation"].replace("T",0).astype("float")
df

Unnamed: 0,Date,Day,High Temp (°F),Low Temp (°F),Precipitation,Brooklyn Bridge,Manhattan Bridge,Williamsburg Bridge,Queensboro Bridge,Total
0,4/1,Saturday,46.0,37.0,0.00,606,1446,1915,1430,5397
1,4/2,Sunday,62.1,41.0,0.00,2021,3943,4207,2862,13033
2,4/3,Monday,63.0,50.0,0.03,2470,4988,5178,3689,16325
3,4/4,Tuesday,51.1,46.0,1.18,723,1913,2279,1666,6581
4,4/5,Wednesday,63.0,46.0,0.00,2807,5276,5711,4197,17991
...,...,...,...,...,...,...,...,...,...,...
209,10/27,Friday,62.1,48.0,0.00,3150,5610,6450,5181,20391
210,10/28,Saturday,68.0,55.9,0.00,2245,4520,5104,4069,15938
211,10/29,Sunday,64.9,61.0,3.03,183,661,1026,965,2835
212,10/30,Monday,55.0,46.0,0.25,1428,2966,3547,2924,10865


In [45]:
#拆出月份信息
df["Month"]=df["Date"].str.split("/",expand=True)[0]
df

Unnamed: 0,Date,Day,High Temp (°F),Low Temp (°F),Precipitation,Brooklyn Bridge,Manhattan Bridge,Williamsburg Bridge,Queensboro Bridge,Total,Month
0,4/1,Saturday,46.0,37.0,0.00,606,1446,1915,1430,5397,4
1,4/2,Sunday,62.1,41.0,0.00,2021,3943,4207,2862,13033,4
2,4/3,Monday,63.0,50.0,0.03,2470,4988,5178,3689,16325,4
3,4/4,Tuesday,51.1,46.0,1.18,723,1913,2279,1666,6581,4
4,4/5,Wednesday,63.0,46.0,0.00,2807,5276,5711,4197,17991,4
...,...,...,...,...,...,...,...,...,...,...,...
209,10/27,Friday,62.1,48.0,0.00,3150,5610,6450,5181,20391,10
210,10/28,Saturday,68.0,55.9,0.00,2245,4520,5104,4069,15938,10
211,10/29,Sunday,64.9,61.0,3.03,183,661,1026,965,2835,10
212,10/30,Monday,55.0,46.0,0.25,1428,2966,3547,2924,10865,10


In [46]:
df["HT"] = df["High Temp (°F)"]
df["LT"] = df["Low Temp (°F)"]
df["Count"] = df["Brooklyn Bridge"].astype("int")
df1=df[["Month","Day","HT","LT","Precipitation","Count"]]
df1

Unnamed: 0,Month,Day,HT,LT,Precipitation,Count
0,4,Saturday,46.0,37.0,0.00,606
1,4,Sunday,62.1,41.0,0.00,2021
2,4,Monday,63.0,50.0,0.03,2470
3,4,Tuesday,51.1,46.0,1.18,723
4,4,Wednesday,63.0,46.0,0.00,2807
...,...,...,...,...,...,...
209,10,Friday,62.1,48.0,0.00,3150
210,10,Saturday,68.0,55.9,0.00,2245
211,10,Sunday,64.9,61.0,3.03,183
212,10,Monday,55.0,46.0,0.25,1428


# Possion回归

In [47]:
from statsmodels.formula.api import poisson
model = poisson("Count ~ C(Month) + C(Day) + HT + LT + Precipitation", data=df1)
res = model.fit()
res.summary()

Optimization terminated successfully.
         Current function value: 67.161490
         Iterations 6


0,1,2,3
Dep. Variable:,Count,No. Observations:,214.0
Model:,Poisson,Df Residuals:,198.0
Method:,MLE,Df Model:,15.0
Date:,"Wed, 21 May 2025",Pseudo R-squ.:,0.6012
Time:,12:18:51,Log-Likelihood:,-14373.0
converged:,True,LL-Null:,-36043.0
Covariance Type:,nonrobust,LLR p-value:,0.0

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,7.2399,0.014,530.809,0.000,7.213,7.267
C(Month)[T.4],-0.1046,0.005,-19.344,0.000,-0.115,-0.094
C(Month)[T.5],-0.0118,0.005,-2.321,0.020,-0.022,-0.002
C(Month)[T.6],0.0412,0.005,7.770,0.000,0.031,0.052
C(Month)[T.7],0.0247,0.006,4.308,0.000,0.013,0.036
C(Month)[T.8],0.1130,0.005,21.420,0.000,0.103,0.123
C(Month)[T.9],0.0460,0.005,8.991,0.000,0.036,0.056
C(Day)[T.Monday],0.0199,0.005,3.949,0.000,0.010,0.030
C(Day)[T.Saturday],-0.0527,0.005,-10.183,0.000,-0.063,-0.043


In [48]:
#对比：线性回归
from statsmodels.formula.api import ols
model = ols("Count ~ C(Month) + C(Day) + HT + LT + Precipitation", data=df1)
res = model.fit()
res.summary()

0,1,2,3
Dep. Variable:,Count,R-squared:,0.594
Model:,OLS,Adj. R-squared:,0.563
Method:,Least Squares,F-statistic:,19.3
Date:,"Wed, 21 May 2025",Prob (F-statistic):,4.1000000000000005e-31
Time:,18:20:28,Log-Likelihood:,-1651.4
No. Observations:,214,AIC:,3335.0
Df Residuals:,198,BIC:,3389.0
Df Model:,15,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,937.9385,395.999,2.369,0.019,157.022,1718.855
C(Month)[T.4],-301.6923,151.709,-1.989,0.048,-600.866,-2.519
C(Month)[T.5],-24.0349,144.259,-0.167,0.868,-308.517,260.447
C(Month)[T.6],31.5648,153.848,0.205,0.838,-271.825,334.955
C(Month)[T.7],-0.6615,164.943,-0.004,0.997,-325.933,324.610
C(Month)[T.8],254.2121,156.234,1.627,0.105,-53.885,562.309
C(Month)[T.9],81.2142,150.099,0.541,0.589,-214.783,377.212
C(Day)[T.Monday],-13.1587,145.142,-0.091,0.928,-299.382,273.064
C(Day)[T.Saturday],-201.2536,144.881,-1.389,0.166,-486.962,84.455

0,1,2,3
Omnibus:,12.263,Durbin-Watson:,1.927
Prob(Omnibus):,0.002,Jarque-Bera (JB):,22.353
Skew:,0.271,Prob(JB):,1.4e-05
Kurtosis:,4.488,Cond. No.,1050.0
