In [1]:
import pandas as pd
from sklearn import preprocessing
import numpy as np
import matplotlib.pyplot as plt
import math

df = pd.read_csv('./AlgerianFF_Region1.csv')
df.merge( pd.read_csv('./AlgerianFF_Region2.csv') )
df.head()

Unnamed: 0,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI
0,1,6,2012,29,57,18,0.0,65.7,3.4,7.6,1.3,3.4,0.5
1,2,6,2012,29,61,13,1.3,64.4,4.1,7.6,1.0,3.9,0.4
2,3,6,2012,26,82,22,13.1,47.1,2.5,7.1,0.3,2.7,0.1
3,4,6,2012,25,89,13,2.5,28.6,1.3,6.9,0.0,1.7,0.0
4,5,6,2012,27,77,16,0.0,64.8,3.0,14.2,1.2,3.9,0.5


In [2]:
df = df.sample(frac=1).reset_index(drop=True)
df.head()

Unnamed: 0,day,month,year,Temperature,RH,Ws,Rain,FFMC,DMC,DC,ISI,BUI,FWI
0,1,9,2012,25,76,17,7.2,46.0,1.3,7.5,0.2,1.8,0.1
1,2,6,2012,29,61,13,1.3,64.4,4.1,7.6,1.0,3.9,0.4
2,3,9,2012,25,78,15,3.8,42.6,1.2,7.5,0.1,1.7,0.0
3,29,8,2012,35,48,18,0.0,90.1,54.2,220.4,12.5,67.4,30.2
4,12,9,2012,29,88,13,0.0,71.0,2.6,16.6,1.2,3.7,0.5


In [3]:
y = np.array(df['FWI'])
# Drop target (FWI) from training data
# Drop day and year as they won't help
# Month is left in as the month could be a predictor of a forest fire
df = df.drop(['FWI', 'day', 'year'], axis=1) 
print(df.head())
inputs = np.array(df, float)
X = preprocessing.scale(inputs)

split_point = math.floor(len(X) * 0.7)

X_train, X_test = X[split_point:], X[split_point + 1: ]
y_train, y_test = y[split_point:], y[split_point + 1: ]

   month  Temperature   RH   Ws  Rain   FFMC   DMC     DC   ISI   BUI
0      9           25   76   17    7.2  46.0   1.3    7.5   0.2   1.8
1      6           29   61   13    1.3  64.4   4.1    7.6   1.0   3.9
2      9           25   78   15    3.8  42.6   1.2    7.5   0.1   1.7
3      8           35   48   18    0.0  90.1  54.2  220.4  12.5  67.4
4      9           29   88   13    0.0  71.0   2.6   16.6   1.2   3.7


In [4]:
# Helper function to see the output of predictions
def print_predictions(regressor):
    for i in range(len(X_test)):
        print(f"{regressor.predict([X_test[i]])[0]}\t{y_test[i]}")

# Calculate the mean absolute error
def calcMeanABSError(regressor, xs, ys):
    total = 0
    for i in range(len(xs)):
        prediction = regressor.predict([xs[i]])[0]
        actual = ys[i]
        total += abs(prediction - actual)
    return total / len(xs)
    

In [5]:
from sklearn.ensemble import RandomForestRegressor

RFregr = RandomForestRegressor(max_depth=2).fit(X_train, y_train)
RFregr.score(X_test, y_test)

0.9750113351335484

In [6]:
calcMeanABSError(RFregr, X_test, y_test)

0.5802871186826984

In [7]:
calcMeanABSError(RFregr, X_train, y_train)

0.5732190145155788

In [8]:
from sklearn.neural_network import MLPRegressor
MLPRegr = MLPRegressor(max_iter=1000).fit(X_train, y_train)
MLPRegr.score(X_test, y_test)

0.9985910861039138

In [9]:
calcMeanABSError(MLPRegr, X_test, y_test)

0.16015354077968944

In [10]:
calcMeanABSError(MLPRegr, X_train, y_train)

0.1593005726115619

In [11]:
from sklearn.svm import SVR
SVMRegr = SVR().fit(X_train, y_train)
SVMRegr.score(X_test, y_test)

0.5240685174555659

In [12]:
calcMeanABSError(SVMRegr, X_test, y_test)

1.993495156010376

In [13]:
calcMeanABSError(SVMRegr, X_train, y_train)

1.9400215850822349