In [185]:
import pandas as pd
import numpy as np


path = '/Users/jonzimmerman/Desktop/Data Projects/Amtrak/data/'
amtrak_df = pd.read_csv(path + 'amtrak_prepped_df.csv')
amtrak_df.shape

(33432, 14)

In [186]:
amtrak_df.head(2)

Unnamed: 0,Year,Month,Rides,address,state,station_name,abbrev,lat,lon,num_routes,active_routes,active_rides,parent_route,business_line
0,2016,1,1486.44,"18 E Bel Air Ave Aberdeen, MD 21001-3701",MD,"Aberdeen, MD",ABE,39.509437,-76.163827,1,['northeast_regional_route'],[21767995.0],Northeast Regional,Northeast Corridor
1,2016,2,2229.66,"18 E Bel Air Ave Aberdeen, MD 21001-3701",MD,"Aberdeen, MD",ABE,39.509437,-76.163827,1,['northeast_regional_route'],[21767995.0],Northeast Regional,Northeast Corridor


In [187]:
#--- Combine Year and Month
amtrak_df['year_month'] = pd.to_datetime(amtrak_df[['Year', 'Month']].assign(day=1))
amtrak_df['year_month'] = amtrak_df['year_month'].dt.strftime('%Y-%m-%d')

### Step 1: Check for any missing ride #s 

In [188]:
missing = pd.DataFrame(amtrak_df.groupby('station_name')['Rides'].apply(lambda x: x.isna().sum())).reset_index()
missing[missing['Rides']>0]

Unnamed: 0,station_name,Rides
11,"Arcadia, MO",12
23,"Bellingham, WA",24
108,"Fairfield-Vacaville, CA",24
198,"Lodi, CA",12
208,"Marks, MS",24
230,"Mount Vernon, WA",12
273,"Plattsburhg, NY",24
295,"Roanoke, VA",24
342,"State Street-New Haven, CT",24
343,"Stockton-Downtown, CA",12


### List out info that will be used in imputation process for each station

In [189]:
missing_list = missing[missing['Rides']>0]['station_name'].unique().tolist()
more_info_about_missing_rides = amtrak_df[amtrak_df['station_name'].isin(missing_list)]
missing_inputs = more_info_about_missing_rides[['station_name','abbrev','lat','lon']].drop_duplicates()
missing_inputs

Unnamed: 0,station_name,abbrev,lat,lon
924,"Arcadia, MO",ACD,37.649536,-90.671728
1932,"Bellingham, WA",BEL,48.720417,-122.511263
9156,"Fairfield-Vacaville, CA",FFV,38.285447,-121.967975
16632,"Lodi, CA",LOD,38.133181,-121.271741
17472,"Marks, MS",MKS,34.258176,-90.272366
19320,"Mount Vernon, WA",MVW,48.417792,-122.334634
22764,"Plattsburhg, NY",PLB,44.696703,-73.44643
24276,"Roanoke, VA",RNK,37.273114,-79.943461
28056,"State Street-New Haven, CT",STS,41.295149,-72.937242
28140,"Stockton-Downtown, CA",SKT,37.955839,-121.278763


## Which years are missing?

In [253]:
stn_code = "SKT"
missing_years = pd.DataFrame(more_info_about_missing_rides[more_info_about_missing_rides['abbrev']==stn_code].groupby('Year')['Rides'].sum()).reset_index()
years_list = missing_years[missing_years['Rides']==0]['Year'].unique().tolist()
years_list

[2021]

### Impute rides based on factor of closest station

In [254]:
# Given latitude and longitude
given_lat = missing_inputs[missing_inputs['abbrev']==stn_code]['lat'].values[0]
given_lon = missing_inputs[missing_inputs['abbrev']==stn_code]['lon'].values[0]

# Function to calculate the Haversine distance
def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Radius of the Earth in kilometers
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat / 2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2.0)**2
    c = 2 * np.arcsin(np.sqrt(a))
    return R * c

# Filter out same stations
amtrak_df_test = amtrak_df[amtrak_df['abbrev']!=stn_code]

#Remove stations in missing list so we dont use them
amtrak_df_test = amtrak_df_test[~amtrak_df_test['abbrev'].isin(missing_inputs['abbrev'].unique().tolist())]

# Apply the function to calculate distance for each row
amtrak_df_test['Distance'] = amtrak_df_test.apply(lambda row: haversine(given_lat, given_lon, row['lat'], row['lon']), axis=1)

# Find the row with the minimum distance
closest_row = amtrak_df_test.loc[amtrak_df_test['Distance'].idxmin()]


if closest_row['station_name'] in missing_list:
    print(f"{closest_row['station_name']} ({closest_row['abbrev']}) is in the stations with missing rides list.")
else:
    print('Closest station is: ', closest_row['station_name'])
    print('Station Code: ', closest_row['abbrev'])

Closest station is:  Modesto, CA
Station Code:  MOD


In [255]:
# Find factor difference between means of 2 stations - impute missing year adjusted by factor

imp_stn_code = closest_row['abbrev']
org_stn_code = stn_code

denominator = amtrak_df[amtrak_df['abbrev']==imp_stn_code]['Rides'].mean()
numerator = amtrak_df[amtrak_df['abbrev']==org_stn_code]['Rides'].mean()

factor = numerator/denominator
print(f'Avg Rides per month for {imp_stn_code}: ', denominator)
print(f'Avg Rides per month for {org_stn_code}: ', numerator)
print('Factor to adjust imputations: ', factor)

Avg Rides per month for MOD:  7854.761904761905
Avg Rides per month for SKT:  1709.9305555555554
Factor to adjust imputations:  0.21769349297767


In [256]:
imputed_values = amtrak_df[amtrak_df['abbrev']==imp_stn_code][['Year','Month','Rides']]
imputed_values = imputed_values[imputed_values['Year'].isin(years_list)]
imputed_values['Rides'] = imputed_values['Rides']*factor
imputed_values['abbrev'] = org_stn_code
imputed_values

Unnamed: 0,Year,Month,Rides,abbrev
19044,2021,1,449.31937,SKT
19045,2021,2,673.979054,SKT
19046,2021,3,786.308897,SKT
19047,2021,4,1010.968581,SKT
19048,2021,5,1235.628266,SKT
19049,2021,6,1235.628266,SKT
19050,2021,7,1347.958109,SKT
19051,2021,8,1123.298424,SKT
19052,2021,9,786.308897,SKT
19053,2021,10,673.979054,SKT


In [257]:
# Merge the dataframes on the common columns
amtrak_df = pd.merge(amtrak_df, imputed_values, on=['Year', 'Month', 'abbrev'], how='left', suffixes=('', '_imputed'))

# Replace missing values in 'value' column with values from 'value_impute'
amtrak_df['Rides'] = amtrak_df['Rides'].combine_first(amtrak_df['Rides_imputed'])

# Drop the 'value_impute' column if no longer needed
amtrak_df = amtrak_df.drop(columns=['Rides_imputed'])

In [258]:
# Check missing again
missing = pd.DataFrame(amtrak_df.groupby('station_name')['Rides'].apply(lambda x: x.isna().sum())).reset_index()
missing[missing['Rides']>0]

Unnamed: 0,station_name,Rides


### Save imputed version of data

In [259]:
amtrak_df.to_csv('amtrak_df_v2.csv',index=False)

### Reread into notebook

In [362]:
path = '/Users/jonzimmerman/Desktop/Data Projects/Amtrak/data/'
amtrak_df = pd.read_csv(path + 'amtrak_df_v2.csv')
amtrak_df.head(2)

Unnamed: 0,Year,Month,Rides,address,state,station_name,abbrev,lat,lon,num_routes,active_routes,active_rides,parent_route,business_line,year_month
0,2016,1,1486.44,"18 E Bel Air Ave Aberdeen, MD 21001-3701",MD,"Aberdeen, MD",ABE,39.509437,-76.163827,1,['northeast_regional_route'],[21767995.0],Northeast Regional,Northeast Corridor,2016-01-01
1,2016,2,2229.66,"18 E Bel Air Ave Aberdeen, MD 21001-3701",MD,"Aberdeen, MD",ABE,39.509437,-76.163827,1,['northeast_regional_route'],[21767995.0],Northeast Regional,Northeast Corridor,2016-02-01


In [363]:
amtrak_df['parent_route'] = np.where(amtrak_df['parent_route'].isna(), "Other", amtrak_df['parent_route'])
amtrak_df['business_line'] = np.where(amtrak_df['business_line'].isna(), "Other", amtrak_df['business_line'])

In [364]:
amtrak_df['business_line'].value_counts()

business_line
Long Distance         19656
State Supported        7392
Other                  4284
Northeast Corridor     2100
Name: count, dtype: int64

### Create case weights

In [365]:
# Get the most recent time in the dataframe for reference
from datetime import datetime

max_month = amtrak_df['year_month'].max()
date_format = '%Y-%m-%d'
date_object = datetime.strptime(max_month, date_format)
date_object

#Calculate the difference in months between each date in the 'Time' column and the most recent date
months_diff = (date_object.year - amtrak_df['Year']) * 12 + date_object.month - amtrak_df['Month']

# More recent months get higher weights, e.g., weight decreases with the number of months passed
amtrak_df['weight'] = 10 - months_diff
amtrak_df['weight'] = amtrak_df['weight'].clip(lower=1)  # Ensure weights do not go below 1
amtrak_df.head(2)

Unnamed: 0,Year,Month,Rides,address,state,station_name,abbrev,lat,lon,num_routes,active_routes,active_rides,parent_route,business_line,year_month,weight
0,2016,1,1486.44,"18 E Bel Air Ave Aberdeen, MD 21001-3701",MD,"Aberdeen, MD",ABE,39.509437,-76.163827,1,['northeast_regional_route'],[21767995.0],Northeast Regional,Northeast Corridor,2016-01-01,1
1,2016,2,2229.66,"18 E Bel Air Ave Aberdeen, MD 21001-3701",MD,"Aberdeen, MD",ABE,39.509437,-76.163827,1,['northeast_regional_route'],[21767995.0],Northeast Regional,Northeast Corridor,2016-02-01,1


In [367]:
amtrak_df['year_month'] = pd.to_datetime(amtrak_df['year_month'], format='%Y-%m-%d')
amtrak_df.dtypes

Year                      int64
Month                     int64
Rides                   float64
address                  object
state                    object
station_name             object
abbrev                   object
lat                     float64
lon                     float64
num_routes                int64
active_routes            object
active_rides             object
parent_route             object
business_line            object
year_month       datetime64[ns]
weight                    int64
dtype: object

### Step 2: Split into training and calibration datasets

In [368]:
from sklearn.model_selection import train_test_split

calibration_size = 0.2  
train_calibration_set, test_df = train_test_split(amtrak_df, test_size=0.2, random_state=42)

train_df, calib_df = train_test_split(train_calibration_set, test_size=calibration_size, random_state=42)

# Show the results
print("TRN set dimensions: ", train_df.shape)
print("CLB set dimensions: ", calib_df.shape)
print("TST set dimensions: ", test_df.shape)

TRN set dimensions:  (21396, 16)
CLB set dimensions:  (5349, 16)
TST set dimensions:  (6687, 16)


### Fit training data to model

In [379]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

# Convert dates to numeric format (number of days since the start of the dataset)
train_df['date_num'] = (train_df['year_month'] - train_df['year_month'].min()).dt.days

# Define the formula for the Poisson regression
formula = 'Rides ~ date_num'

# Group the training data by the grouping variable
grouped = train_df.groupby('station_name')

# Prepare a dictionary to store models and future predictions
models = {}
future_predictions = {}

# Forecasting horizon
forecast_horizon = 12

# Fit Poisson regression models for each group and make future predictions
for group_name, group_data in grouped:
    # Fit the model
    model = smf.poisson(formula=formula, data=group_data, freq_weights=group_data['weight']).fit()
    models[group_name] = model

    # Create future data points for prediction
    last_date_num = group_data['date_num'].max()
    
    future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
    future_date_num = (future_dates - train_df['year_month'].min()).days
    
    future_data = pd.DataFrame({
        'date_num': future_date_num
    })

    # Make predictions for the future data points
    future_predictions[group_name] = model.predict(future_data)

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

Optimization terminated successfully.
         Current function value: 208.848041
         Iterations 7
Optimization terminated successfully.
         Current function value: 3764.451790
         Iterations 7
Optimization terminated successfully.
         Current function value: 141.016935
         Iterations 7
Optimization terminated successfully.
         Current function value: 288.200378
         Iterations 7
Optimization terminated successfully.
         Current function value: 4.752507
         Iterations 7
Optimization terminated successfully.
         Current function value: 25.922272
         Iterations 7
Optimization terminated successfully.
         Current function value: 290.176808
         Iterations 7
Optimization terminated successfully.
         Current function value: 75.225581
         Iterations 7
Optimization terminated successfully.
         Current function value: 48.210797
         Iterations 7
Optimization terminated successfully.
         Current function valu

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

Optimization terminated successfully.
         Current function value: 933.009702
         Iterations 7
Optimization terminated successfully.
         Current function value: 770.934103
         Iterations 7
Optimization terminated successfully.
         Current function value: 352.671163
         Iterations 7
Optimization terminated successfully.
         Current function value: 30.495006
         Iterations 8
Optimization terminated successfully.
         Current function value: 13642.094857
         Iterations 7
Optimization terminated successfully.
         Current function value: 56.098799
         Iterations 7
Optimization terminated successfully.
         Current function value: 40.628061
         Iterations 7
Optimization terminated successfully.
         Current function value: 175.711627
         Iterations 7
Optimization terminated successfully.
         Current function value: 11.472901
         Iterations 7
Optimization terminated successfully.
         Current function va

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

Optimization terminated successfully.
         Current function value: 833.732021
         Iterations 7
Optimization terminated successfully.
         Current function value: 478.427424
         Iterations 7
Optimization terminated successfully.
         Current function value: 63.480833
         Iterations 7
Optimization terminated successfully.
         Current function value: 168.660907
         Iterations 7
Optimization terminated successfully.
         Current function value: 186.060241
         Iterations 7
Optimization terminated successfully.
         Current function value: 135.224064
         Iterations 7
Optimization terminated successfully.
         Current function value: 159.521704
         Iterations 7
Optimization terminated successfully.
         Current function value: 71.082130
         Iterations 7
Optimization terminated successfully.
         Current function value: 157.968443
         Iterations 7
Optimization terminated successfully.
         Current function va

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

Optimization terminated successfully.
         Current function value: 20.818835
         Iterations 7
Optimization terminated successfully.
         Current function value: 128.015298
         Iterations 8
Optimization terminated successfully.
         Current function value: 1469.846460
         Iterations 7
Optimization terminated successfully.
         Current function value: 122.418439
         Iterations 7
Optimization terminated successfully.
         Current function value: 336.612621
         Iterations 7
Optimization terminated successfully.
         Current function value: 284.214781
         Iterations 7
Optimization terminated successfully.
         Current function value: 136.431538
         Iterations 7
Optimization terminated successfully.
         Current function value: 42.273862
         Iterations 7
Optimization terminated successfully.
         Current function value: 75.172654
         Iterations 7
Optimization terminated successfully.
         Current function va

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

Optimization terminated successfully.
         Current function value: 482.395561
         Iterations 7
Optimization terminated successfully.
         Current function value: 96.326339
         Iterations 7
Optimization terminated successfully.
         Current function value: 43.221257
         Iterations 7
Optimization terminated successfully.
         Current function value: 2071.144189
         Iterations 7
Optimization terminated successfully.
         Current function value: 242.502237
         Iterations 7
Optimization terminated successfully.
         Current function value: 362.372045
         Iterations 7
Optimization terminated successfully.
         Current function value: 2586.852067
         Iterations 7
Optimization terminated successfully.
         Current function value: 760.170538
         Iterations 8
Optimization terminated successfully.
         Current function value: 127.524340
         Iterations 7
Optimization terminated successfully.
         Current function 

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

Optimization terminated successfully.
         Current function value: 146.141553
         Iterations 7
Optimization terminated successfully.
         Current function value: 25194.181324
         Iterations 7
Optimization terminated successfully.
         Current function value: 609.578078
         Iterations 7
Optimization terminated successfully.
         Current function value: 39.488823
         Iterations 7
Optimization terminated successfully.
         Current function value: 98.294172
         Iterations 7
Optimization terminated successfully.
         Current function value: 8.425968
         Iterations 7
Optimization terminated successfully.
         Current function value: 65.234868
         Iterations 7
Optimization terminated successfully.
         Current function value: 66.767600
         Iterations 7
Optimization terminated successfully.
         Current function value: 19.167317
         Iterations 7
Optimization terminated successfully.
         Current function value

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset

Optimization terminated successfully.
         Current function value: 17.497026
         Iterations 7
Optimization terminated successfully.
         Current function value: 82.477862
         Iterations 7
Optimization terminated successfully.
         Current function value: 205.751029
         Iterations 7
Optimization terminated successfully.
         Current function value: 5374.786358
         Iterations 8
Optimization terminated successfully.
         Current function value: 237.458450
         Iterations 7
Optimization terminated successfully.
         Current function value: 106.062467
         Iterations 7
Optimization terminated successfully.
         Current function value: 114.395392
         Iterations 7
Optimization terminated successfully.
         Current function value: 142.003761
         Iterations 7
Optimization terminated successfully.
         Current function value: 249.073870
         Iterations 7
Optimization terminated successfully.
         Current function v

  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset(months=1), periods=forecast_horizon, freq='M')
  future_dates = pd.date_range(start=group_data['year_month'].max() + pd.DateOffset