## Regression models for forecasting counts. 
## The Negative Binomial (NB) regression model is another commonly used model for count based data.

In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt 
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
import seaborn as sns
import numpy as np
import logging
import statsmodels.api as sm

In [2]:
from  load_data_functions import get_fire_data
from load_data_functions import get_climate_data
from data_models import MapCoordinates
from data_models import FireDataGrid
from data_models import RowRange
from data_models import ColumnRange
from data_models import GridCell

from load_data_functions import get_all_fire_data
from load_data_functions import get_all_climate_data
from load_data_functions import get_mapped_fire_data


from fire_occurrence_functions import set_fire_count
from climate_mapping_functions import set_climate_data

In [3]:
#all_fire_data = get_all_fire_data()
#all_climate_data = get_all_climate_data()
#model_fire_data = get_mapped_fire_data()
fire_data_grid = FireDataGrid()


fire data Row 9657 and Columns 3219
finished row 0 
finished row 110 
finished row 221 
finished row 332 
finished row 443 
finished row 554 
finished row 665 
finished row 776 
finished row 887 
finished row 998 
finished row 1109 
finished row 1220 
finished row 1331 
finished row 1442 
finished row 1553 
finished row 1664 
finished row 1775 
finished row 1886 
finished row 1997 
finished row 2108 
finished row 2219 
finished row 2330 
finished row 2441 
finished row 2552 
finished row 2663 
finished row 2774 
finished row 2885 
finished row 2996 
finished row 3107 
finished row 3218 
finished row 3329 
finished row 3440 
finished row 3551 
finished row 3662 
finished row 3773 
finished row 3884 
finished row 3995 
finished row 4106 
finished row 4217 
finished row 4328 
finished row 4439 
finished row 4550 
finished row 4661 
finished row 4772 
finished row 4883 
finished row 4994 
finished row 5105 
finished row 5216 
finished row 5327 
finished row 5438 
finished row 5549 
finishe

In [4]:
fire_data_grid.grid = set_fire_count(fire_data_grid.grid, fire_data_grid.row_ranges, fire_data_grid.col_ranges)

fire 0 row completed  - -121.7423:55.534 at 2248
fire 100 row completed  - -123.823:55.6139 at 2017
fire 200 row completed  - -121.107:56.2446 at 2319
fire 300 row completed  - -123.2417:53.5183 at 2082
fire 400 row completed  - -103.5294:52.9941 at 4270
fire 500 row completed  - -102.4841:51.2677 at 4386
fire 600 row completed  - -103.7297:51.8092 at 4248
fire 700 row completed  - -118.8879:55.1004 at 2565
fire 800 row completed  - -127.1433:58.7763 at 1649
fire 900 row completed  - -103.629:53.1563 at 4259
fire 1000 row completed  - -108.1918:53.1133 at 3752
fire 1100 row completed  - -109.1957:58.3678 at 3641
fire 1200 row completed  - -107.2034:56.8356 at 3862
fire 1300 row completed  - -138.0236:64.1289 at 441
fire 1400 row completed  - -139.8343:63.1138 at 240
fire 1500 row completed  - -139.8866:63.1286 at 234
fire 1600 row completed  - -138.1829:64.1427 at 423
fire 1700 row completed  - -136.9243:62.5803 at 563
fire 1800 row completed  - -137.9681:66.1333 at 447
fire 1900 row c

In [5]:
#fire_data_grid.grid = set_climate_data(fire_data_grid.grid, fire_data_grid.row_ranges, fire_data_grid.col_ranges, all_climate_data)

In [6]:
from model_data_preparation import prepare_model_data

model_data_dataframe = prepare_model_data(fire_data_grid.grid)

grid 0 row completed
grid 100 row completed
grid 200 row completed
grid 300 row completed
grid 400 row completed
grid 500 row completed
grid 600 row completed
grid 700 row completed
grid 800 row completed
grid 900 row completed
grid 1000 row completed
grid 1100 row completed
grid 1200 row completed
grid 1300 row completed
grid 1400 row completed
grid 1500 row completed
grid 1600 row completed
grid 1700 row completed
grid 1800 row completed
grid 1900 row completed
grid 2000 row completed
grid 2100 row completed
grid 2200 row completed
grid 2300 row completed
grid 2400 row completed
grid 2500 row completed
grid 2600 row completed
grid 2700 row completed
grid 2800 row completed
grid 2900 row completed
grid 3000 row completed
grid 3100 row completed
grid 3200 row completed
grid 3300 row completed
grid 3400 row completed
grid 3500 row completed
grid 3600 row completed
grid 3700 row completed
grid 3800 row completed
grid 3900 row completed
grid 4000 row completed
grid 4100 row completed
grid

In [9]:

model_data = pd.read_csv('poisson_regression_model_data_4.csv')
model_data_X = model_data[['daynight', 'month', '10m_u_component_of_wind', '10m_v_component_of_wind', '2m_temperature', 
                           'soil_temperature_level_1', 'soil_temperature_level_2', 'soil_temperature_level_3', 'soil_temperature_level_4','soil_type', 'total_precipitation', 'volumetric_soil_water_layer_1', 'volumetric_soil_water_layer_2', 'volumetric_soil_water_layer_3', 'volumetric_soil_water_layer_4']]
model_data_X = pd.get_dummies(model_data_X, columns=['daynight'])
model_data_y = model_data[['number_of_fire']]
model_data_y = model_data_y.values.ravel()
X_train, X_test, y_train, y_test = train_test_split(model_data_X, model_data_y, test_size=0.3, random_state=0)
 
poisson_training_results = sm.GLM(np.asarray(model_data_y), X_train, family=sm.families.Poisson()).fit()
# Print the training summary.
print(poisson_training_results.summary())

In [10]:
model_data = pd.read_csv('poisson_regression_model_data_4.csv')
model_data.rename(columns={'10m_u_component_of_wind': 'u_wind'}, inplace=True)
model_data.rename(columns={'10m_v_component_of_wind': 'v_wind'}, inplace=True)
model_data.rename(columns={'2m_temperature': 'm_temperature'}, inplace=True)
 
model = sm.Poisson.from_formula('number_of_fire ~ daynight + month + u_wind + v_wind + m_temperature + soil_temperature_level_1 + soil_temperature_level_2 + soil_temperature_level_3 + soil_temperature_level_4 + soil_type +  total_precipitation + volumetric_soil_water_layer_1 + volumetric_soil_water_layer_2 + volumetric_soil_water_layer_3 + volumetric_soil_water_layer_4', data=model_data)
 
model_fit = model.fit()
 
model_fit.summary()