# Predicting Fire Brigade Call Outs Relating to Specific Types of Fires.

In [7]:
import pandas as pd
from patsy import dmatrices
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
from sklearn.metrics import r2_score
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [8]:
df = pd.read_csv('FireBrigadeCallOuts_TypeOfFire.csv', header=0, infer_datetime_format=True, parse_dates=[0], index_col=[0])
df["EVENT"] = df["EVENT"].fillna("No Event")
df['MONTH'] = df['MONTH'].astype(object)
df['DAY_OF_WEEK'] = df['DAY_OF_WEEK'].astype(object)
df['DAY'] = df['DAY'].astype(object)
df['HOUR'] = df['HOUR'].astype(object)
df = df.loc[(df['DESCRIPTION'] == "Fire ALARM") | (df['DESCRIPTION'] == "Fire CAR")]
df = df.set_index('DESCRIPTION')

## Baseline - Poisson With Time Features

In [9]:
mask = np.random.rand(len(df)) < 0.7
df_train = df[mask]
df_test = df[~mask]
print('Train Set ='+str(len(df_train)))
print('Test Set ='+str(len(df_test)))
expr = """COUNT ~ MONTH + HOUR + STATION_AREA + DAY + DAY_OF_WEEK"""
y_train, X_train = dmatrices(expr, df_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, df_test, return_type='dataframe')
while X_train.shape[1] != X_test.shape[1]:
    mask = np.random.rand(len(df)) < 0.7
    df_train = df[mask]
    df_test = df[~mask]
    y_train, X_train = dmatrices(expr, df_train, return_type='dataframe')
    y_test, X_test = dmatrices(expr, df_test, return_type='dataframe')
print(X_train.shape)
print(X_test.shape)

Train Set =7226
Test Set =3024
(7226, 84)
(3024, 84)


In [10]:
poisson_training_results1 = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results1.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                  COUNT   No. Observations:                 7226
Model:                            GLM   Df Residuals:                     7142
Model Family:                 Poisson   Df Model:                           83
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:            -1.8029e+06
Date:                Mon, 11 May 2020   Deviance:                   3.5302e+06
Time:                        16:21:18   Pearson chi2:                 3.31e+06
No. Iterations:                     4                                         
Covariance Type:            nonrobust                                         
                                     coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------------
Intercept   

In [11]:
poisson_predictions1 = poisson_training_results1.get_prediction(X_test)
predictions_summary_frame1 = poisson_predictions1.summary_frame()

In [12]:
poisson1mean = predictions_summary_frame1["mean"]

## Negative Binomial Model #1 - With Time Features

In [13]:
expr = """COUNT ~ MONTH + HOUR + STATION_AREA + DAY_OF_WEEK"""
y_train, X_train = dmatrices(expr, df_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, df_test, return_type='dataframe')

In [14]:
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.mu)
df_train['LAMBDA'] = poisson_training_results.mu
df_train['AUX_OLS_DEP'] = df_train.apply(lambda x: ((x['COUNT'] - x['LAMBDA'])**2 - x['COUNT']) / x['LAMBDA'], axis=1)

[4438.58025425 4551.63998851 6013.14227664 ... 5076.40259035 5070.95290692
 4975.38448418]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [15]:
ols_expr = """AUX_OLS_DEP ~ LAMBDA - 1"""
aux_olsr_results = smf.ols(ols_expr, df_train).fit()
print(aux_olsr_results.params)
aux_olsr_results.tvalues

LAMBDA    0.071936
dtype: float64


LAMBDA    71.104563
dtype: float64

In [16]:
nb2_training_results0 = sm.GLM(y_train, X_train,family=sm.families.NegativeBinomial(alpha=aux_olsr_results.params[0])).fit()
print(nb2_training_results0.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                  COUNT   No. Observations:                 7226
Model:                            GLM   Df Residuals:                     7172
Model Family:        NegativeBinomial   Df Model:                           53
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -64377.
Date:                Mon, 11 May 2020   Deviance:                       9854.6
Time:                        16:21:21   Pearson chi2:                 8.59e+03
No. Iterations:                     6                                         
Covariance Type:            nonrobust                                         
                                     coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------------
Intercept   

In [17]:
nb2_predictions0 = nb2_training_results0.get_prediction(X_test)
nb2_predictions_summary_frame0 = nb2_predictions0.summary_frame()

In [18]:
nb0mean = nb2_predictions_summary_frame0["mean"]

## Negative Binomial Model #2 - With Weather Features

In [19]:
expr = """COUNT ~ MONTH + HOUR + PRECIPITATION + STATION_AREA + WIND_SPEED + TEMPERATURE + DAY_OF_WEEK"""
y_train, X_train = dmatrices(expr, df_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, df_test, return_type='dataframe')

In [20]:
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.mu)
df_train['LAMBDA'] = poisson_training_results.mu
df_train['AUX_OLS_DEP'] = df_train.apply(lambda x: ((x['COUNT'] - x['LAMBDA'])**2 - x['COUNT']) / x['LAMBDA'], axis=1)

[4431.32195834 4552.35739    5987.47352367 ... 5152.35778851 5150.3493986
 5049.55343256]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [21]:
ols_expr = """AUX_OLS_DEP ~ LAMBDA - 1"""
aux_olsr_results = smf.ols(ols_expr, df_train).fit()

In [22]:
nb2_training_results1 = sm.GLM(y_train, X_train,family=sm.families.NegativeBinomial(alpha=aux_olsr_results.params[0])).fit()
print(nb2_training_results1.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                  COUNT   No. Observations:                 7226
Model:                            GLM   Df Residuals:                     7169
Model Family:        NegativeBinomial   Df Model:                           56
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -64368.
Date:                Mon, 11 May 2020   Deviance:                       9855.7
Time:                        16:21:24   Pearson chi2:                 8.60e+03
No. Iterations:                     6                                         
Covariance Type:            nonrobust                                         
                                     coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------------
Intercept   

In [23]:
nb2_predictions1 = nb2_training_results1.get_prediction(X_test)
nb2_predictions_summary_frame1 = nb2_predictions1.summary_frame()

In [24]:
nb1mean = nb2_predictions_summary_frame1["mean"]

## Negative Binomial Model #3 - With Event Features

In [25]:
expr = """COUNT ~ MONTH + HOUR + STATION_AREA + DAY_OF_WEEK + EVENT"""
y_train, X_train = dmatrices(expr, df_train, return_type='dataframe')
y_test, X_test = dmatrices(expr, df_test, return_type='dataframe')

In [26]:
poisson_training_results = sm.GLM(y_train, X_train, family=sm.families.Poisson()).fit()
print(poisson_training_results.mu)
df_train['LAMBDA'] = poisson_training_results.mu
df_train['AUX_OLS_DEP'] = df_train.apply(lambda x: ((x['COUNT'] - x['LAMBDA'])**2 - x['COUNT']) / x['LAMBDA'], axis=1)

[4439.02950991 4550.95197528 6014.98103691 ... 5077.57618338 5073.53334836
 4979.08754786]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after removing the cwd from sys.path.


In [27]:
ols_expr = """AUX_OLS_DEP ~ LAMBDA - 1"""
aux_olsr_results = smf.ols(ols_expr, df_train).fit()

In [28]:
nb2_training_results2 = sm.GLM(y_train, X_train,family=sm.families.NegativeBinomial(alpha=aux_olsr_results.params[0])).fit()
print(nb2_training_results2.summary())

                 Generalized Linear Model Regression Results                  
Dep. Variable:                  COUNT   No. Observations:                 7226
Model:                            GLM   Df Residuals:                     7168
Model Family:        NegativeBinomial   Df Model:                           57
Link Function:                    log   Scale:                          1.0000
Method:                          IRLS   Log-Likelihood:                -64376.
Date:                Mon, 11 May 2020   Deviance:                       9854.4
Time:                        16:21:27   Pearson chi2:                 8.59e+03
No. Iterations:                     6                                         
Covariance Type:            nonrobust                                         
                                     coef    std err          z      P>|z|      [0.025      0.975]
--------------------------------------------------------------------------------------------------
Intercept   

In [29]:
nb2_predictions2 = nb2_training_results2.get_prediction(X_test)
nb2_predictions_summary_frame2 = nb2_predictions2.summary_frame()

In [30]:
nb2mean = nb2_predictions_summary_frame2["mean"]

## Evaluation

In [31]:
X_test["ACTUAL"] = y_test["COUNT"]
actual = X_test["ACTUAL"]
X_test["POISSON1_PREDICTIONS"] = poisson1mean
X_test["NB0_PREDICTIONS"] = nb0mean
X_test["NB1_PREDICTIONS"] = nb1mean
X_test["NB2_PREDICTIONS"] = nb2mean

In [32]:
XT1 = X_test.loc[['Fire ALARM']]
XT2 = X_test.loc[['Fire CAR']]

xt1actual = XT1["ACTUAL"]
xt1pois1 = XT1["POISSON1_PREDICTIONS"]
xt1nb0 = XT1["NB0_PREDICTIONS"]
xt1nb1 = XT1["NB1_PREDICTIONS"]
xt1nb2 = XT1["NB2_PREDICTIONS"]

xt2actual = XT2["ACTUAL"]
xt2pois1 = XT2["POISSON1_PREDICTIONS"]
xt2nb0 = XT2["NB0_PREDICTIONS"]
xt2nb1 = XT2["NB1_PREDICTIONS"]
xt2nb2 = XT2["NB2_PREDICTIONS"]

In [33]:
poisson1MAE = mean_absolute_percentage_error(xt1actual, xt1pois1)
nb0MAE = mean_absolute_percentage_error(xt1actual, xt1nb0)
nb1MAE = mean_absolute_percentage_error(xt1actual, xt1nb1)
nb2MAE = mean_absolute_percentage_error(xt1actual, xt1nb2)

print("Mean Absolute Percentage Error per Model for Call Outs Relating to Alarms are as follows:")
print("Poisson Model with Time Features              = {:.4f}".format(poisson1MAE))
print("Negative Binomial Model with Time Features    = {:.4f}".format(nb0MAE))
print("Negative Binomial Model with Weather Features = {:.4f}".format(nb1MAE))
print("Negative Binomial Model with Event Features   = {:.4f}".format(nb2MAE))

Mean Absolute Percentage Error per Model for Call Outs Relating to Alarms are as follows:
Poisson Model with Time Features              = 14.7037
Negative Binomial Model with Time Features    = 14.7047
Negative Binomial Model with Weather Features = 14.7366
Negative Binomial Model with Event Features   = 14.7095


In [34]:
poisson1MAE = mean_absolute_percentage_error(xt2actual, xt2pois1)
nb0MAE = mean_absolute_percentage_error(xt2actual, xt2nb0)
nb1MAE = mean_absolute_percentage_error(xt2actual, xt2nb1)
nb2MAE = mean_absolute_percentage_error(xt2actual, xt2nb2)

print("Mean Absolute Percentage Error per Model for Call Outs Relating to Cars are as follows:")
print("Poisson Model with Time Features              = {:.4f}".format(poisson1MAE))
print("Negative Binomial Model with Time Features    = {:.4f}".format(nb0MAE))
print("Negative Binomial Model with Weather Features = {:.4f}".format(nb1MAE))
print("Negative Binomial Model with Event Features   = {:.4f}".format(nb2MAE))

Mean Absolute Percentage Error per Model for Call Outs Relating to Cars are as follows:
Poisson Model with Time Features              = 66.4007
Negative Binomial Model with Time Features    = 65.8884
Negative Binomial Model with Weather Features = 65.9146
Negative Binomial Model with Event Features   = 65.8481
