# Maschine Learning Models
Poission and Negative Binomial Regression

In [45]:
# imports
import pandas as pd
import statsmodels.api as sm

In [46]:
# variables
train_data_input_filepath = '../pickles/reservations_training.pickle'
test_data_input_filepath = '../pickles/reservations_testing.pickle'

large_grid_models_output_filepath = '../pickles/large_grid_ml_models.pickle'
small_grid_models_output_filepath = '../pickles/small_grid_ml_models.pickle'
voronoi_grid_models_output_filepath = '../pickles/voronoi_grid_ml_models.pickle'

In [47]:
train_reservations = pd.read_pickle(train_data_input_filepath)
train_reservations['startTime'] = pd.to_datetime(train_reservations['startTime'])
train_reservations.set_index('startTime', inplace=True)
train_reservations.drop(columns=['endTime', 'temperature', 'precipitation', 'startLat', 'startLon', 'endLat', 'endLon'], inplace=True)
train_reservations.head()

Unnamed: 0_level_0,voronoi_grid_id,small_grid_id,large_grid_id,community_small_grid_id,community_voronoi_grid_id
startTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2023-02-27 14:05:52,263,4717,195,7,0
2021-05-28 13:30:25,863,6600,266,0,4
2022-08-27 17:47:21,468,5484,212,0,7
2021-07-22 16:31:23,287,3963,163,6,9
2018-09-15 16:14:54,335,3269,126,12,4


# Large Grid

In [50]:
large_grid_train_reservations = train_reservations.resample('3H').large_grid_id.value_counts().unstack().fillna(0)
large_grid_train_reservations['hour'] = large_grid_train_reservations.index.hour
large_grid_train_reservations['day_of_week'] = large_grid_train_reservations.index.dayofweek
large_grid_train_reservations['month'] = large_grid_train_reservations.index.month
large_grid_train_reservations.reset_index(inplace=True, drop=True)
large_grid_train_reservations.head()

large_grid_id,12,21,53,69,70,75,76,90,103,107,...,299,300,301,312,313,318,333,hour,day_of_week,month
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12,5,10
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15,5,10
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18,5,10
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21,5,10
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6,6,10


In [63]:
large_grid_train_reservations = large_grid_train_reservations[
    (large_grid_train_reservations['month'] != 1) &
    (large_grid_train_reservations['month'] != 2) &
    (large_grid_train_reservations['month'] != 11) &
    (large_grid_train_reservations['month'] != 12)
]

In [53]:
large_grid_cells = train_reservations.large_grid_id.unique()

large_grid_poisson_models = []
large_grid_negative_binomial_models = []

for (index, cell) in enumerate(large_grid_cells):
    print(f'Training progress: {index}/{len(large_grid_cells)}')
    x = large_grid_train_reservations[['hour', 'day_of_week', 'month']]
    x = pd.get_dummies(x, columns=['hour', 'day_of_week', 'month'], drop_first=True)

    y = large_grid_train_reservations[cell]

    try:
        print(f'Fitting poisson model for cell {cell}')
        poisson_model = sm.GLM(y, x, family=sm.families.Poisson()).fit(method='lbfgs')
        large_grid_poisson_models.append(poisson_model)
    except Exception as e:
        print(f'Error training Poisson model for cell {cell}. Error: {e}')
        large_grid_poisson_models.append(None)

    try:
        print(f'Fitting negative binomial model for cell {cell}')
        negative_binomial_model = sm.GLM(y, x, family=sm.families.NegativeBinomial(alpha=1.0)).fit(method='lbfgs')
        large_grid_negative_binomial_models.append(negative_binomial_model)
    except Exception as e:
        print(f'Error training Negative Binomial model for cell {cell}. Error: {e}')
        large_grid_negative_binomial_models.append(None)

large_grid_results = pd.DataFrame({'cell': large_grid_cells, 'poisson_model': large_grid_poisson_models, 'negative_binomial_model': large_grid_negative_binomial_models })
large_grid_results.head()

Training progress: 0/92
Fitting poisson model for cell 195
Fitting negative binomial model for cell 195
Training progress: 1/92
Fitting poisson model for cell 266
Fitting negative binomial model for cell 266
Training progress: 2/92
Fitting poisson model for cell 212
Fitting negative binomial model for cell 212
Training progress: 3/92
Fitting poisson model for cell 163
Fitting negative binomial model for cell 163
Training progress: 4/92
Fitting poisson model for cell 126
Fitting negative binomial model for cell 126
Training progress: 5/92
Fitting poisson model for cell 111
Fitting negative binomial model for cell 111
Training progress: 6/92
Fitting poisson model for cell 127
Fitting negative binomial model for cell 127
Training progress: 7/92
Fitting poisson model for cell 109
Fitting negative binomial model for cell 109
Training progress: 8/92
Fitting poisson model for cell 161
Fitting negative binomial model for cell 161
Training progress: 9/92
Fitting poisson model for cell 162
Fitti

Unnamed: 0,cell,poisson_model,negative_binomial_model
0,195,<statsmodels.genmod.generalized_linear_model.G...,<statsmodels.genmod.generalized_linear_model.G...
1,266,<statsmodels.genmod.generalized_linear_model.G...,<statsmodels.genmod.generalized_linear_model.G...
2,212,<statsmodels.genmod.generalized_linear_model.G...,<statsmodels.genmod.generalized_linear_model.G...
3,163,<statsmodels.genmod.generalized_linear_model.G...,<statsmodels.genmod.generalized_linear_model.G...
4,126,<statsmodels.genmod.generalized_linear_model.G...,<statsmodels.genmod.generalized_linear_model.G...


In [54]:
len(large_grid_negative_binomial_models)

92

# Small Grid

In [55]:
small_grid_community_dict = train_reservations.groupby('community_small_grid_id')['small_grid_id'].apply(lambda x: list(set(x))).to_dict()

small_grid_train_reservations = train_reservations.resample('3H').small_grid_id.value_counts().unstack().fillna(0)
small_grid_train_reservations['hour'] = small_grid_train_reservations.index.hour
small_grid_train_reservations['day_of_week'] = small_grid_train_reservations.index.dayofweek
small_grid_train_reservations['month'] = small_grid_train_reservations.index.month
small_grid_train_reservations.head()

small_grid_id,61,615,616,700,701,785,786,788,789,1626,...,7627,7628,7712,7769,7770,7854,8127,hour,day_of_week,month
startTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-10-14 12:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12,5,10
2017-10-14 15:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15,5,10
2017-10-14 18:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18,5,10
2017-10-14 21:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21,5,10
2017-10-15 06:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6,6,10


In [56]:
small_grid_train_reservations = small_grid_train_reservations[
    (small_grid_train_reservations['month'] != 1) &
    (small_grid_train_reservations['month'] != 2) &
    (small_grid_train_reservations['month'] != 11) &
    (small_grid_train_reservations['month'] != 12)
]

In [57]:
small_grid_poisson_models = []
small_grid_negative_binomial_models = []

for (community, cells) in small_grid_community_dict.items():
    print(f'Training model for community {community}')
    df_melted = small_grid_train_reservations.melt(id_vars=['hour', 'day_of_week', 'month'], value_vars=cells, var_name='grid_cell', value_name='reservations')
    df_melted['grid_cell'] = df_melted['grid_cell'].astype('int32')
    
    x = df_melted[['hour', 'day_of_week', 'month', 'grid_cell']]
    x = pd.get_dummies(x, columns=['hour', 'day_of_week', 'month', 'grid_cell'], drop_first=True)

    y = df_melted['reservations']

    try:
        poisson_model = sm.GLM(y, x, family=sm.families.Poisson()).fit(method='lbfgs')
        small_grid_poisson_models.append(poisson_model)
    except Exception as e:
        print(f'Error training Poisson model for community {community}')

    try:
        negative_binomial_model = sm.GLM(y, x, family=sm.families.NegativeBinomial(alpha=1.0)).fit(method='lbfgs')
        small_grid_negative_binomial_models.append(negative_binomial_model)
    except Exception as e:
        print(f'Error training Negative Binomial model for community {community}')

small_grid_results = pd.DataFrame({ 'community': small_grid_community_dict.keys(), 'poisson_model': small_grid_poisson_models, 'negative_binomial_model': small_grid_negative_binomial_models })
small_grid_results.head()

Training model for community 0
Training model for community 1
Training model for community 2
Training model for community 3
Training model for community 4
Training model for community 5
Training model for community 6
Training model for community 7
Training model for community 8
Training model for community 9
Training model for community 10
Training model for community 11
Training model for community 12
Training model for community 13
Training model for community 14
Training model for community 15
Training model for community 16
Training model for community 17
Training model for community 18
Training model for community 19


Unnamed: 0,community,poisson_model,negative_binomial_model
0,0,<statsmodels.genmod.generalized_linear_model.G...,<statsmodels.genmod.generalized_linear_model.G...
1,1,<statsmodels.genmod.generalized_linear_model.G...,<statsmodels.genmod.generalized_linear_model.G...
2,2,<statsmodels.genmod.generalized_linear_model.G...,<statsmodels.genmod.generalized_linear_model.G...
3,3,<statsmodels.genmod.generalized_linear_model.G...,<statsmodels.genmod.generalized_linear_model.G...
4,4,<statsmodels.genmod.generalized_linear_model.G...,<statsmodels.genmod.generalized_linear_model.G...


# Voronoi Grid

In [58]:
voronoi_grid_community_dict = train_reservations.groupby('community_voronoi_grid_id')['voronoi_grid_id'].apply(lambda x: list(set(x))).to_dict()

voronoi_grid_train_reservations = train_reservations.resample('3H').voronoi_grid_id.value_counts().unstack().fillna(0)
voronoi_grid_train_reservations['hour'] = voronoi_grid_train_reservations.index.hour
voronoi_grid_train_reservations['day_of_week'] = voronoi_grid_train_reservations.index.dayofweek
voronoi_grid_train_reservations['month'] = voronoi_grid_train_reservations.index.month
voronoi_grid_train_reservations.head()

voronoi_grid_id,0,1,2,3,4,5,6,7,8,9,...,1142,1143,1144,1145,1146,1147,1148,hour,day_of_week,month
startTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-10-14 12:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12,5,10
2017-10-14 15:00:00,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15,5,10
2017-10-14 18:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,18,5,10
2017-10-14 21:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,21,5,10
2017-10-15 06:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6,6,10


In [59]:
voronoi_grid_train_reservations = voronoi_grid_train_reservations[
    (voronoi_grid_train_reservations['month'] != 1) &
    (voronoi_grid_train_reservations['month'] != 2) &
    (voronoi_grid_train_reservations['month'] != 11) &
    (voronoi_grid_train_reservations['month'] != 12)
]

In [61]:
voronoi_grid_poisson_models = []
voronoi_grid_negative_binomial_models = []

for (community, cells) in voronoi_grid_community_dict.items():
    print(f'Training model for community {community}')
    df_melted = voronoi_grid_train_reservations.melt(id_vars=['hour', 'day_of_week', 'month'], value_vars=cells, var_name='grid_cell', value_name='reservations')
        
    x = df_melted[['hour', 'day_of_week', 'month', 'grid_cell']]
    x = pd.get_dummies(x, columns=['hour', 'day_of_week', 'month', 'grid_cell'], drop_first=True)

    y = df_melted['reservations']

    try:
        poisson_model = sm.GLM(y, x, family=sm.families.Poisson()).fit(method='lbfgs')
        voronoi_grid_poisson_models.append(poisson_model)
    except Exception as e:
        print(f'Error training Poisson model for community {community} with error: {e}')
        voronoi_grid_poisson_models.append(None)

    try:
        negative_binomial_model = sm.GLM(y, x, family=sm.families.NegativeBinomial(alpha=1.0)).fit(method='lbfgs')
        voronoi_grid_negative_binomial_models.append(negative_binomial_model)
    except Exception as e:
        print(f'Error training Negative Binomial model for community {community} with error: {e}')
        voronoi_grid_negative_binomial_models.append(None)

voronoi_grid_results = pd.DataFrame({'community': voronoi_grid_community_dict.keys(), 'poisson_model': voronoi_grid_poisson_models, 'negative_binomial_model': voronoi_grid_negative_binomial_models })
voronoi_grid_results.head()

Training model for community 0
Training model for community 1
Training model for community 2
Training model for community 3
Training model for community 4
Training model for community 5
Training model for community 6
Training model for community 7
Training model for community 8
Training model for community 9
Training model for community 10
Training model for community 11
Training model for community 12
Training model for community 13
Training model for community 14
Error training Poisson model for community 14 with error: The first guess on the deviance function returned a nan.  This could be a boundary  problem and should be reported.
Error training Negative Binomial model for community 14 with error: The first guess on the deviance function returned a nan.  This could be a boundary  problem and should be reported.
Training model for community 15
Training model for community 16


  endog_mu = self._clean(endog / mu)
  endog_mu = self._clean(endog / mu)


Training model for community 17
Training model for community 18
Training model for community 19


Unnamed: 0,community,poisson_model,negative_binomial_model
0,0,<statsmodels.genmod.generalized_linear_model.G...,<statsmodels.genmod.generalized_linear_model.G...
1,1,<statsmodels.genmod.generalized_linear_model.G...,<statsmodels.genmod.generalized_linear_model.G...
2,2,<statsmodels.genmod.generalized_linear_model.G...,<statsmodels.genmod.generalized_linear_model.G...
3,3,<statsmodels.genmod.generalized_linear_model.G...,<statsmodels.genmod.generalized_linear_model.G...
4,4,<statsmodels.genmod.generalized_linear_model.G...,<statsmodels.genmod.generalized_linear_model.G...


In [62]:
# save models
large_grid_results.to_pickle(large_grid_models_output_filepath)
small_grid_results.to_pickle(small_grid_models_output_filepath)
voronoi_grid_results.to_pickle(voronoi_grid_models_output_filepath)