# Build a multinomial logistic model

In [10]:
#import libraries required
import statsmodels.api as sm
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
#load df into notebook
df = pd.read_csv('../Data/forest_fire_clean.csv')
df.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,month_num,day_num,area_squared_km,fire_spread
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0,3,4,0.0,0
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0,10,1,0.0,0
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0,10,5,0.0,0
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0,3,4,0.0,0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0,3,6,0.0,0


In [3]:
#drop redundant columns
df_final = df.drop(columns= ['month','day','area','area_squared_km'], axis=1)

In [5]:
#Drop duplicate values
df_final = df.drop_duplicates()

In [6]:
#Drop all null values (cannot be used in the analysis)
df_final = df_final.dropna(how='any',axis=0) 

In [7]:
#Verify that nf has no null values
df_final.isna().sum()

X                  0
Y                  0
month              0
day                0
FFMC               0
DMC                0
DC                 0
ISI                0
temp               0
RH                 0
wind               0
rain               0
area               0
month_num          0
day_num            0
area_squared_km    0
fire_spread        0
dtype: int64

In [8]:
df_final.shape
#df.shape

(512, 17)

In [9]:
df_final.head()

Unnamed: 0,X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area,month_num,day_num,area_squared_km,fire_spread
0,7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0.0,0.0,3,4,0.0,0
1,7,4,oct,tue,90.6,35.4,669.1,6.7,18.0,33,0.9,0.0,0.0,10,1,0.0,0
2,7,4,oct,sat,90.6,43.7,686.9,6.7,14.6,33,1.3,0.0,0.0,10,5,0.0,0
3,8,6,mar,fri,91.7,33.3,77.5,9.0,8.3,97,4.0,0.2,0.0,3,4,0.0,0
4,8,6,mar,sun,89.3,51.3,102.2,9.6,11.4,99,1.8,0.0,0.0,3,6,0.0,0


In [9]:


sns.pairplot(data=df_final)
plt.show()

In [42]:
#Finding the correlation 
df_final.corr(numeric_only = True)

Unnamed: 0,X,Y,FFMC,DMC,DC,ISI,temp,RH,wind,rain,month_num,day_num,fire_spread
X,1.0,0.543747,-0.021824,-0.048463,-0.086293,-0.015867,-0.051837,0.083881,0.020357,0.065385,-0.065755,-0.025182,0.068042
Y,0.543747,1.0,-0.045686,0.008102,-0.099368,-0.020476,-0.023702,0.062581,-0.019409,0.03328,-0.065726,-0.00605,0.047385
FFMC,-0.021824,-0.045686,1.0,0.384121,0.335555,0.582868,0.43138,-0.300928,-0.029576,0.056926,0.294521,-0.043413,0.059647
DMC,-0.048463,0.008102,0.384121,1.0,0.682472,0.347262,0.469836,0.071705,-0.105002,0.074751,0.464286,0.064707,0.045302
DC,-0.086293,-0.099368,0.335555,0.682472,1.0,0.288681,0.49962,-0.044212,-0.202667,0.035557,0.868115,0.007736,0.06252
ISI,-0.015867,-0.020476,0.582868,0.347262,0.288681,1.0,0.434592,-0.149293,0.113819,0.077653,0.223406,0.007131,0.044385
temp,-0.051837,-0.023702,0.43138,0.469836,0.49962,0.434592,1.0,-0.528723,-0.227857,0.069588,0.369799,0.051904,0.038908
RH,0.083881,0.062581,-0.300928,0.071705,-0.044212,-0.149293,-0.528723,1.0,0.07145,0.099613,-0.100244,0.095444,-0.045208
wind,0.020357,-0.019409,-0.029576,-0.105002,-0.202667,0.113819,-0.227857,0.07145,1.0,0.061523,-0.084853,0.029545,0.071429
rain,0.065385,0.03328,0.056926,0.074751,0.035557,0.077653,0.069588,0.099613,0.061523,1.0,0.013098,-0.048099,0.044033


# Set variables for analysis:
    
Dependent variable(y) = fire_spread <br /> 
Independent variables(x) = X , Y, FFMC, DMC, DC, ISI, temp, RH, wind, rain,month_num, day_num <br /> 

## Bins Created in EDA

fire_spread 1 = minimal spread <br>
fire_spread 2 = moderate spread <br>
fire_spread 3 = large spread <br>

In [43]:
df_final['fire_spread'].value_counts()

0    244
1    139
2    129
Name: fire_spread, dtype: int64

In [44]:
x = df_final[['X','Y','FFMC','DMC','DC','ISI','temp','RH','wind','rain','month_num','day_num']] #set x variable
y = df_final['fire_spread'] #set y variable

model = sm.MNLogit(y, x) #set as float values

results = model.fit() #fit the model
print(results.summary())

Optimization terminated successfully.
         Current function value: 1.022677
         Iterations 7
                          MNLogit Regression Results                          
Dep. Variable:            fire_spread   No. Observations:                  512
Model:                        MNLogit   Df Residuals:                      488
Method:                           MLE   Df Model:                           22
Date:                Wed, 27 Sep 2023   Pseudo R-squ.:                 0.03018
Time:                        22:09:59   Log-Likelihood:                -523.61
converged:                       True   LL-Null:                       -539.91
Covariance Type:            nonrobust   LLR p-value:                   0.06789
fire_spread=1       coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------
X                 0.0122      0.055      0.220      0.826      -0.096       0.121
Y                 0.