In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv("cleaned_forestfires.csv")
df

Unnamed: 0.1,Unnamed: 0,month,ffmc,dmc,temp,rh,dc,area
0,138,7,85.8,48.3,18.0,42,313.4,0.36
1,139,9,91.0,129.5,21.7,38,692.6,0.43
2,140,9,90.9,126.5,21.9,39,686.5,0.47
3,141,8,95.5,99.9,23.3,31,513.3,0.55
4,142,8,90.1,108.0,21.2,51,529.8,0.61
...,...,...,...,...,...,...,...,...
265,509,8,91.0,166.9,21.1,71,752.6,2.17
266,510,8,91.0,166.9,18.2,62,752.6,0.43
267,512,8,81.6,56.7,27.8,32,665.6,6.44
268,513,8,81.6,56.7,21.9,71,665.6,54.29


In [3]:
df.drop('Unnamed: 0', axis=1, inplace=True)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 270 entries, 0 to 269
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   month   270 non-null    int64  
 1   ffmc    270 non-null    float64
 2   dmc     270 non-null    float64
 3   temp    270 non-null    float64
 4   rh      270 non-null    int64  
 5   dc      270 non-null    float64
 6   area    270 non-null    float64
dtypes: float64(5), int64(2)
memory usage: 14.9 KB


In [5]:
df['area'].value_counts()

1.94      3
1.75      2
6.43      2
2.14      2
11.06     2
         ..
105.66    1
154.88    1
196.48    1
200.94    1
11.16     1
Name: area, Length: 250, dtype: int64

In [6]:
from sklearn.model_selection import train_test_split

from sklearn.ensemble import ExtraTreesRegressor
from sklearn.tree import DecisionTreeRegressor

In [7]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)
X_train.shape, X_test.shape

((189, 6), (81, 6))

In [9]:
model = ExtraTreesRegressor()
model.fit(X, y)
y_pred = model.predict(X_test)

In [10]:
accuracy_train = model.score(X_train, y_train)
accuracy_test  = model.score(X_test, y_test)

In [11]:
print(f"Akurasi Model (Train) : {np.round(accuracy_train * 100,2)} %")
print(f"Akurasi Model (Test)  : {np.round(accuracy_test * 100,2)} %")

Akurasi Model (Train) : 99.98 %
Akurasi Model (Test)  : 99.92 %


In [12]:
import joblib
joblib.dump((model), "extra_tree.model")

['extra_tree.model']

In [13]:
df_test = pd.DataFrame(data={
    "month" : [3],
    "ffmc"  : [86.2],
    "dmc" : [26.2],
    "temp"  : [8.2],
    "rh"  : [51],
    "dc"  : [94.3]
})

df_test[0:1]

Unnamed: 0,month,ffmc,dmc,temp,rh,dc
0,3,86.2,26.2,8.2,51,94.3


In [14]:
pred_test = model.predict(df_test[0:1])
print(np.round(pred_test[0],2))

14.2


In [15]:
# from sklearn.linear_model import LogisticRegression

# logmodel = LogisticRegression()
# print(logmodel.fit(X_train,y_train))

In [16]:
from sklearn.metrics import confusion_matrix

In [17]:
y_test

153     4.42
115     8.85
81     31.86
165     5.83
5       0.71
       ...  
88     49.37
122     4.40
231     1.94
78     29.48
235     6.84
Name: area, Length: 81, dtype: float64

In [18]:
y_pred

array([  4.42 ,   8.85 ,  31.86 ,   5.83 ,   0.71 ,  15.095,   4.25 ,
        13.99 ,   8.98 ,   1.95 ,   7.73 ,   3.5  ,  20.03 ,   1.07 ,
         5.65 ,   7.8  ,   4.41 , 185.76 ,   3.93 ,  48.55 ,   6.3  ,
       174.63 , 200.94 ,  56.04 ,  15.34 ,   1.19 ,  13.7  ,  54.29 ,
         0.9  ,   7.4  ,   2.03 ,  30.32 ,   9.96 ,   8.31 ,  12.1  ,
         1.75 ,   7.36 ,  95.18 ,   2.74 ,   6.83 ,  16.4  ,   0.52 ,
         2.01 ,   5.86 ,   6.43 ,   4.88 ,   1.63 ,   1.38 ,  16.33 ,
         1.76 ,  35.88 ,  10.08 ,   2.44 ,  10.13 ,   2.17 ,   2.77 ,
        32.1  ,   3.52 ,   9.36 ,   7.77 ,  19.23 ,  12.18 ,   4.53 ,
        17.2  ,   2.35 ,   2.87 ,   5.8  ,   7.77 ,   3.63 ,  37.02 ,
         6.61 ,   6.96 ,   3.64 ,  24.23 , 103.39 ,  24.59 ,  49.37 ,
         4.4  ,   1.94 ,  29.48 ,   6.84 ])

In [24]:
# mean squared error
from sklearn.metrics import mean_squared_error
mean_squared_error(y_test, y_pred)

1.1240793209876572

In [25]:
# root mean squared error
mean_squared_error(y_test, y_pred, squared = False)

1.06022607069797

In [26]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

0.9992083388133971

In [27]:
# define variables for adjusted r2 score
r2 = r2_score(y_test, y_pred)
n = len(y_test)
k = len(X_test.columns)

# calculate adjusted r2 score
adj_r2_score = 1-(((1-r2)*(n-1))/(n-k-1))

adj_r2_score

0.9991441500685374