In [185]:
import pandas as pd
import numpy as np


path = '/Users/jonzimmerman/Desktop/Data Projects/Amtrak/data/'
amtrak_df = pd.read_csv(path + 'amtrak_prepped_df.csv')
amtrak_df.shape

(33432, 14)

In [186]:
amtrak_df.head(2)

Unnamed: 0,Year,Month,Rides,address,state,station_name,abbrev,lat,lon,num_routes,active_routes,active_rides,parent_route,business_line
0,2016,1,1486.44,"18 E Bel Air Ave Aberdeen, MD 21001-3701",MD,"Aberdeen, MD",ABE,39.509437,-76.163827,1,['northeast_regional_route'],[21767995.0],Northeast Regional,Northeast Corridor
1,2016,2,2229.66,"18 E Bel Air Ave Aberdeen, MD 21001-3701",MD,"Aberdeen, MD",ABE,39.509437,-76.163827,1,['northeast_regional_route'],[21767995.0],Northeast Regional,Northeast Corridor


In [187]:
#--- Combine Year and Month
amtrak_df['year_month'] = pd.to_datetime(amtrak_df[['Year', 'Month']].assign(day=1))
amtrak_df['year_month'] = amtrak_df['year_month'].dt.strftime('%Y-%m-%d')

### Step 1: Check for any missing ride #s 

In [188]:
missing = pd.DataFrame(amtrak_df.groupby('station_name')['Rides'].apply(lambda x: x.isna().sum())).reset_index()
missing[missing['Rides']>0]

Unnamed: 0,station_name,Rides
11,"Arcadia, MO",12
23,"Bellingham, WA",24
108,"Fairfield-Vacaville, CA",24
198,"Lodi, CA",12
208,"Marks, MS",24
230,"Mount Vernon, WA",12
273,"Plattsburhg, NY",24
295,"Roanoke, VA",24
342,"State Street-New Haven, CT",24
343,"Stockton-Downtown, CA",12


### List out info that will be used in imputation process for each station

In [189]:
missing_list = missing[missing['Rides']>0]['station_name'].unique().tolist()
more_info_about_missing_rides = amtrak_df[amtrak_df['station_name'].isin(missing_list)]
missing_inputs = more_info_about_missing_rides[['station_name','abbrev','lat','lon']].drop_duplicates()
missing_inputs

Unnamed: 0,station_name,abbrev,lat,lon
924,"Arcadia, MO",ACD,37.649536,-90.671728
1932,"Bellingham, WA",BEL,48.720417,-122.511263
9156,"Fairfield-Vacaville, CA",FFV,38.285447,-121.967975
16632,"Lodi, CA",LOD,38.133181,-121.271741
17472,"Marks, MS",MKS,34.258176,-90.272366
19320,"Mount Vernon, WA",MVW,48.417792,-122.334634
22764,"Plattsburhg, NY",PLB,44.696703,-73.44643
24276,"Roanoke, VA",RNK,37.273114,-79.943461
28056,"State Street-New Haven, CT",STS,41.295149,-72.937242
28140,"Stockton-Downtown, CA",SKT,37.955839,-121.278763


## Which years are missing?

In [253]:
stn_code = "SKT"
missing_years = pd.DataFrame(more_info_about_missing_rides[more_info_about_missing_rides['abbrev']==stn_code].groupby('Year')['Rides'].sum()).reset_index()
years_list = missing_years[missing_years['Rides']==0]['Year'].unique().tolist()
years_list

[2021]

### Impute rides based on factor of closest station

In [254]:
# Given latitude and longitude
given_lat = missing_inputs[missing_inputs['abbrev']==stn_code]['lat'].values[0]
given_lon = missing_inputs[missing_inputs['abbrev']==stn_code]['lon'].values[0]

# Function to calculate the Haversine distance
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Radius of the Earth in kilometers
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2.0)**2
    c = 2 * np.arcsin(np.sqrt(a))
    return R * c

# Filter out same stations
amtrak_df_test = amtrak_df[amtrak_df['abbrev']!=stn_code]

#Remove stations in missing list so we dont use them
amtrak_df_test = amtrak_df_test[~amtrak_df_test['abbrev'].isin(missing_inputs['abbrev'].unique().tolist())]

# Apply the function to calculate distance for each row
amtrak_df_test['Distance'] = amtrak_df_test.apply(lambda row: haversine(given_lat, given_lon, row['lat'], row['lon']), axis=1)

# Find the row with the minimum distance
closest_row = amtrak_df_test.loc[amtrak_df_test['Distance'].idxmin()]


if closest_row['station_name'] in missing_list:
    print(f"{closest_row['station_name']} ({closest_row['abbrev']}) is in the stations with missing rides list.")
else:
    print('Closest station is: ', closest_row['station_name'])
    print('Station Code: ', closest_row['abbrev'])

Closest station is:  Modesto, CA
Station Code:  MOD


In [255]:
# Find factor difference between means of 2 stations - impute missing year adjusted by factor

imp_stn_code = closest_row['abbrev']
org_stn_code = stn_code

denominator = amtrak_df[amtrak_df['abbrev']==imp_stn_code]['Rides'].mean()
numerator = amtrak_df[amtrak_df['abbrev']==org_stn_code]['Rides'].mean()

factor = numerator/denominator
print(f'Avg Rides per month for {imp_stn_code}: ', denominator)
print(f'Avg Rides per month for {org_stn_code}: ', numerator)
print('Factor to adjust imputations: ', factor)

Avg Rides per month for MOD:  7854.761904761905
Avg Rides per month for SKT:  1709.9305555555554
Factor to adjust imputations:  0.21769349297767


In [256]:
imputed_values = amtrak_df[amtrak_df['abbrev']==imp_stn_code][['Year','Month','Rides']]
imputed_values = imputed_values[imputed_values['Year'].isin(years_list)]
imputed_values['Rides'] = imputed_values['Rides']*factor
imputed_values['abbrev'] = org_stn_code
imputed_values

Unnamed: 0,Year,Month,Rides,abbrev
19044,2021,1,449.31937,SKT
19045,2021,2,673.979054,SKT
19046,2021,3,786.308897,SKT
19047,2021,4,1010.968581,SKT
19048,2021,5,1235.628266,SKT
19049,2021,6,1235.628266,SKT
19050,2021,7,1347.958109,SKT
19051,2021,8,1123.298424,SKT
19052,2021,9,786.308897,SKT
19053,2021,10,673.979054,SKT


In [257]:
# Merge the dataframes on the common columns
amtrak_df = pd.merge(amtrak_df, imputed_values, on=['Year', 'Month', 'abbrev'], how='left', suffixes=('', '_imputed'))

# Replace missing values in 'value' column with values from 'value_impute'
amtrak_df['Rides'] = amtrak_df['Rides'].combine_first(amtrak_df['Rides_imputed'])

# Drop the 'value_impute' column if no longer needed
amtrak_df = amtrak_df.drop(columns=['Rides_imputed'])

In [258]:
# Check missing again
missing = pd.DataFrame(amtrak_df.groupby('station_name')['Rides'].apply(lambda x: x.isna().sum())).reset_index()
missing[missing['Rides']>0]

Unnamed: 0,station_name,Rides


### Save imputed version of data

In [259]:
amtrak_df.to_csv('amtrak_df_v2.csv',index=False)

### Reread into notebook

In [362]:
path = '/Users/jonzimmerman/Desktop/Data Projects/Amtrak/data/'
amtrak_df = pd.read_csv(path + 'amtrak_df_v2.csv')
amtrak_df.head(2)

Unnamed: 0,Year,Month,Rides,address,state,station_name,abbrev,lat,lon,num_routes,active_routes,active_rides,parent_route,business_line,year_month
0,2016,1,1486.44,"18 E Bel Air Ave Aberdeen, MD 21001-3701",MD,"Aberdeen, MD",ABE,39.509437,-76.163827,1,['northeast_regional_route'],[21767995.0],Northeast Regional,Northeast Corridor,2016-01-01
1,2016,2,2229.66,"18 E Bel Air Ave Aberdeen, MD 21001-3701",MD,"Aberdeen, MD",ABE,39.509437,-76.163827,1,['northeast_regional_route'],[21767995.0],Northeast Regional,Northeast Corridor,2016-02-01


In [363]:
amtrak_df['parent_route'] = np.where(amtrak_df['parent_route'].isna(), "Other", amtrak_df['parent_route'])
amtrak_df['business_line'] = np.where(amtrak_df['business_line'].isna(), "Other", amtrak_df['business_line'])

In [364]:
amtrak_df['business_line'].value_counts()

business_line
Long Distance         19656
State Supported        7392
Other                  4284
Northeast Corridor     2100
Name: count, dtype: int64

### Create case weights

In [365]:
# Get the most recent time in the dataframe for reference
from datetime import datetime

max_month = amtrak_df['year_month'].max()
date_format = '%Y-%m-%d'
date_object = datetime.strptime(max_month, date_format)
date_object

#Calculate the difference in months between each date in the 'Time' column and the most recent date
months_diff = (date_object.year - amtrak_df['Year']) * 12 + date_object.month - amtrak_df['Month']

# More recent months get higher weights, e.g., weight decreases with the number of months passed
amtrak_df['weight'] = 10 - months_diff
amtrak_df['weight'] = amtrak_df['weight'].clip(lower=1)  # Ensure weights do not go below 1
amtrak_df.head(2)

Unnamed: 0,Year,Month,Rides,address,state,station_name,abbrev,lat,lon,num_routes,active_routes,active_rides,parent_route,business_line,year_month,weight
0,2016,1,1486.44,"18 E Bel Air Ave Aberdeen, MD 21001-3701",MD,"Aberdeen, MD",ABE,39.509437,-76.163827,1,['northeast_regional_route'],[21767995.0],Northeast Regional,Northeast Corridor,2016-01-01,1
1,2016,2,2229.66,"18 E Bel Air Ave Aberdeen, MD 21001-3701",MD,"Aberdeen, MD",ABE,39.509437,-76.163827,1,['northeast_regional_route'],[21767995.0],Northeast Regional,Northeast Corridor,2016-02-01,1


In [367]:
amtrak_df['year_month'] = pd.to_datetime(amtrak_df['year_month'], format='%Y-%m-%d')
amtrak_df.dtypes

Year                      int64
Month                     int64
Rides                   float64
address                  object
state                    object
station_name             object
abbrev                   object
lat                     float64
lon                     float64
num_routes                int64
active_routes            object
active_rides             object
parent_route             object
business_line            object
year_month       datetime64[ns]
weight                    int64
dtype: object

### Step 2: Split into training and calibration datasets

In [368]:
from sklearn.model_selection import train_test_split

calibration_size = 0.2  
train_calibration_set, test_df = train_test_split(amtrak_df, test_size=0.2, random_state=42)

train_df, calib_df = train_test_split(train_calibration_set, test_size=calibration_size, random_state=42)

# Show the results
print("TRN set dimensions: ", train_df.shape)
print("CLB set dimensions: ", calib_df.shape)
print("TST set dimensions: ", test_df.shape)

TRN set dimensions:  (21396, 16)
CLB set dimensions:  (5349, 16)
TST set dimensions:  (6687, 16)


### Fit training data to model

In [375]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Convert dates to numeric format (number of days since the start of the dataset)
train_df['date_num'] = (train_df['year_month'] - train_df['year_month'].min()).dt.days

# Define the formula for the Poisson regression
formula = 'Rides ~ date_num'

# Group the training data by the grouping variable
grouped = train_df.groupby('station_name')

# Prepare a dictionary to store models and future predictions
models = {}
future_predictions = {}

# Forecasting horizon
forecast_horizon = 12

# Fit Poisson regression models for each group and make future predictions
for group_name, group_data in grouped:
    # Fit the model
    model = smf.poisson(formula=formula, data=group_data, freq_weights=group_data['weight']).fit()
    models[group_name] = model
    print(f"Summary for group {group_name}:")
    print(model.summary())

    # Create future data points for prediction
    last_date_num = group_data['date_num'].max()
    
    future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
    future_date_num = (future_dates - train_df['year_month'].min()).days
    
    future_data = pd.DataFrame({
        'date_num': future_date_num
    })

    # Make predictions for the future data points
    future_predictions[group_name] = model.predict(future_data)
    print(f"Future predictions for group {group_name}:")
    print(future_predictions[group_name])

# future_predictions dictionary contains the forecasts for each group for the next 12 months

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

Optimization terminated successfully.
         Current function value: 208.848041
         Iterations 7
Summary for group Aberdeen, MD:
                          Poisson Regression Results                          
Dep. Variable:                  Rides   No. Observations:                   57
Model:                        Poisson   Df Residuals:                       55
Method:                           MLE   Df Model:                            1
Date:                Sun, 21 Jul 2024   Pseudo R-squ.:                 0.09638
Time:                        13:36:17   Log-Likelihood:                -11904.
converged:                       True   LL-Null:                       -13174.
Covariance Type:            nonrobust   LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      8.0341      0.005   1588.829      0.000       8.024       8

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

Future predictions for group Benson, AZ:
0     113.623460
1     113.060836
2     112.441180
3     111.844746
4     111.231755
5     110.641737
6     110.035339
7     109.432265
8     108.851792
9     108.255204
10    107.680975
11    107.090804
dtype: float64
Optimization terminated successfully.
         Current function value: 944.997406
         Iterations 8
Summary for group Berkeley, CA:
                          Poisson Regression Results                          
Dep. Variable:                  Rides   No. Observations:                   62
Model:                        Poisson   Df Residuals:                       60
Method:                           MLE   Df Model:                            1
Date:                Sun, 21 Jul 2024   Pseudo R-squ.:                  0.4140
Time:                        13:36:17   Log-Likelihood:                -58590.
converged:                       True   LL-Null:                       -99987.
Covariance Type:            nonrobust   LLR p-value

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

                          Poisson Regression Results                          
Dep. Variable:                  Rides   No. Observations:                   54
Model:                        Poisson   Df Residuals:                       52
Method:                           MLE   Df Model:                            1
Date:                Sun, 21 Jul 2024   Pseudo R-squ.:               0.0002024
Time:                        13:36:17   Log-Likelihood:                -50383.
converged:                       True   LL-Null:                       -50393.
Covariance Type:            nonrobust   LLR p-value:                 6.279e-06
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      9.4969      0.002   4241.054      0.000       9.493       9.501
date_num     6.89e-06   1.53e-06      4.517      0.000     3.9e-06    9.88e-06
Future predictions for group Charlotte, NC:
0     13

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

Future predictions for group Denver, CO:
0     7021.955400
1     6969.172055
2     6916.785477
3     6869.807117
4     6818.167456
5     6768.563210
6     6717.684589
7     6668.811386
8     6618.682590
9     6568.930607
10    6521.139635
11    6472.120873
dtype: float64
Optimization terminated successfully.
         Current function value: 21.092035
         Iterations 7
Summary for group Detroit Lakes, MN:
                          Poisson Regression Results                          
Dep. Variable:                  Rides   No. Observations:                   58
Model:                        Poisson   Df Residuals:                       56
Method:                           MLE   Df Model:                            1
Date:                Sun, 21 Jul 2024   Pseudo R-squ.:                  0.2152
Time:                        13:36:17   Log-Likelihood:                -1223.3
converged:                       True   LL-Null:                       -1558.8
Covariance Type:            nonrobu

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

Optimization terminated successfully.
         Current function value: 496.516436
         Iterations 7
Summary for group Exeter, NH:
                          Poisson Regression Results                          
Dep. Variable:                  Rides   No. Observations:                   56
Model:                        Poisson   Df Residuals:                       54
Method:                           MLE   Df Model:                            1
Date:                Sun, 21 Jul 2024   Pseudo R-squ.:                  0.2154
Time:                        13:36:18   Log-Likelihood:                -27805.
converged:                       True   LL-Null:                       -35436.
Covariance Type:            nonrobust   LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      8.9483      0.003   2915.400      0.000       8.942       8.9

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

Future predictions for group Glenwood Springs, CO:
0     2236.502713
1     2218.515005
2     2202.392423
3     2184.679056
4     2167.672746
5     2150.238622
6     2133.500408
7     2116.341125
8     2099.319850
9     2082.978005
10    2066.225063
11    2050.140840
dtype: float64
Optimization terminated successfully.
         Current function value: 790.492425
         Iterations 7
Summary for group Goleta, CA:
                          Poisson Regression Results                          
Dep. Variable:                  Rides   No. Observations:                   51
Model:                        Poisson   Df Residuals:                       49
Method:                           MLE   Df Model:                            1
Date:                Sun, 21 Jul 2024   Pseudo R-squ.:                  0.1112
Time:                        13:36:18   Log-Likelihood:                -40315.
converged:                       True   LL-Null:                       -45357.
Covariance Type:            non

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

Optimization terminated successfully.
         Current function value: 975.341403
         Iterations 7
Summary for group Hudson, NY:
                          Poisson Regression Results                          
Dep. Variable:                  Rides   No. Observations:                   55
Model:                        Poisson   Df Residuals:                       53
Method:                           MLE   Df Model:                            1
Date:                Sun, 21 Jul 2024   Pseudo R-squ.:                 0.02847
Time:                        13:36:18   Log-Likelihood:                -53644.
converged:                       True   LL-Null:                       -55216.
Covariance Type:            nonrobust   LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      9.8031      0.002   4444.021      0.000       9.799       9.8

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

Future predictions for group Jefferson City, MO:
0     1705.626397
1     1690.271127
2     1676.520705
3     1661.427466
4     1646.950493
5     1632.123466
6     1617.901836
7     1603.336325
8     1588.901944
9     1575.056928
10    1560.877138
11    1547.276318
dtype: float64
Optimization terminated successfully.
         Current function value: 42.273862
         Iterations 7
Summary for group Jesup, GA:
                          Poisson Regression Results                          
Dep. Variable:                  Rides   No. Observations:                   53
Model:                        Poisson   Df Residuals:                       51
Method:                           MLE   Df Model:                            1
Date:                Sun, 21 Jul 2024   Pseudo R-squ.:                  0.1950
Time:                        13:36:18   Log-Likelihood:                -2240.5
converged:                       True   LL-Null:                       -2783.2
Covariance Type:            nonrobu

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

Optimization terminated successfully.
         Current function value: 81.584209
         Iterations 7
Summary for group Little Rock, AR:
                          Poisson Regression Results                          
Dep. Variable:                  Rides   No. Observations:                   53
Model:                        Poisson   Df Residuals:                       51
Method:                           MLE   Df Model:                            1
Date:                Sun, 21 Jul 2024   Pseudo R-squ.:                  0.2561
Time:                        13:36:19   Log-Likelihood:                -4324.0
converged:                       True   LL-Null:                       -5812.3
Covariance Type:            nonrobust   LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      7.4853      0.007   1149.675      0.000       7.473      

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

                          Poisson Regression Results                          
Dep. Variable:                  Rides   No. Observations:                   52
Model:                        Poisson   Df Residuals:                       50
Method:                           MLE   Df Model:                            1
Date:                Sun, 21 Jul 2024   Pseudo R-squ.:                  0.4719
Time:                        13:36:19   Log-Likelihood:                -39529.
converged:                       True   LL-Null:                       -74849.
Covariance Type:            nonrobust   LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      9.7366      0.002   4232.009      0.000       9.732       9.741
date_num      -0.0005    1.9e-06   -259.637      0.000      -0.000      -0.000
Future predictions for group Milwaukee-Airport, WI:


  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

Future predictions for group Oakland, CA:
0     12978.658108
1     12842.926463
2     12694.307292
3     12552.120001
4     12406.866061
5     12267.898362
6     12125.933453
7     11985.611372
8     11851.362091
9     11714.217367
10    11583.007936
11    11448.968623
dtype: float64
Optimization terminated successfully.
         Current function value: 528.001671
         Iterations 8
Summary for group Oakland-Coliseum, CA:
                          Poisson Regression Results                          
Dep. Variable:                  Rides   No. Observations:                   53
Model:                        Poisson   Df Residuals:                       51
Method:                           MLE   Df Model:                            1
Date:                Sun, 21 Jul 2024   Pseudo R-squ.:                  0.4427
Time:                        13:36:19   Log-Likelihood:                -27984.
converged:                       True   LL-Null:                       -50217.
Covariance Type:  

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

Optimization terminated successfully.
         Current function value: 92.685561
         Iterations 7
Summary for group Port Huron, MI:
                          Poisson Regression Results                          
Dep. Variable:                  Rides   No. Observations:                   51
Model:                        Poisson   Df Residuals:                       49
Method:                           MLE   Df Model:                            1
Date:                Sun, 21 Jul 2024   Pseudo R-squ.:                  0.4131
Time:                        13:36:19   Log-Likelihood:                -4727.0
converged:                       True   LL-Null:                       -8054.7
Covariance Type:            nonrobust   LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      7.5667      0.007   1109.846      0.000       7.553       

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

                          Poisson Regression Results                          
Dep. Variable:                  Rides   No. Observations:                   54
Model:                        Poisson   Df Residuals:                       52
Method:                           MLE   Df Model:                            1
Date:                Sun, 21 Jul 2024   Pseudo R-squ.:                  0.4026
Time:                        13:36:19   Log-Likelihood:            -2.9024e+05
converged:                       True   LL-Null:                   -4.8585e+05
Covariance Type:            nonrobust   LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     11.6135      0.001   1.22e+04      0.000      11.612      11.615
date_num      -0.0005   7.51e-07   -614.375      0.000      -0.000      -0.000
Future predictions for group Sacramento, CA:
0     3

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

Future predictions for group Spokane, WA:
0     2023.790998
1     2001.662711
2     1979.776377
3     1960.213838
4     1938.780709
5     1918.262132
6     1897.287706
7     1877.208259
8     1856.682720
9     1836.381608
10    1816.946745
11    1797.080109
dtype: float64
Optimization terminated successfully.
         Current function value: 809.796543
         Iterations 7
Summary for group Springfield, IL:
                          Poisson Regression Results                          
Dep. Variable:                  Rides   No. Observations:                   53
Model:                        Poisson   Df Residuals:                       51
Method:                           MLE   Df Model:                            1
Date:                Sun, 21 Jul 2024   Pseudo R-squ.:                  0.2386
Time:                        13:36:20   Log-Likelihood:                -42919.
converged:                       True   LL-Null:                       -56365.
Covariance Type:            nonrobu

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

                          Poisson Regression Results                          
Dep. Variable:                  Rides   No. Observations:                   53
Model:                        Poisson   Df Residuals:                       51
Method:                           MLE   Df Model:                            1
Date:                Sun, 21 Jul 2024   Pseudo R-squ.:                  0.2170
Time:                        13:36:20   Log-Likelihood:                -7828.9
converged:                       True   LL-Null:                       -9999.0
Covariance Type:            nonrobust   LLR p-value:                     0.000
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      7.9693      0.005   1506.702      0.000       7.959       7.980
date_num      -0.0003   3.91e-06    -65.586      0.000      -0.000      -0.000
Future predictions for group Turlock-Denair, CA:
0  

                          Poisson Regression Results                          
Dep. Variable:                  Rides   No. Observations:                   52
Model:                        Poisson   Df Residuals:                       50
Method:                           MLE   Df Model:                            1
Date:                Sun, 21 Jul 2024   Pseudo R-squ.:                 0.05306
Time:                        13:36:20   Log-Likelihood:                -4116.9
converged:                       True   LL-Null:                       -4347.5
Covariance Type:            nonrobust   LLR p-value:                2.475e-102
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      7.4676      0.007   1041.873      0.000       7.454       7.482
date_num      -0.0001   5.05e-06    -21.472      0.000      -0.000   -9.85e-05
Future predictions for group Winter Haven, FL:
0    

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset