In [122]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_style('darkgrid')

from sklearn.preprocessing import LabelEncoder, StandardScaler, PolynomialFeatures
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import  classification_report, accuracy_score, f1_score, recall_score, precision_score

from sklearn.inspection import permutation_importance

from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd

seed = 1000

In [124]:
df = pd.read_csv('cleaned_algeriean_forest_fire.csv',parse_dates=['Date'])

In [126]:
X = df.iloc[:,1:11]
y = df.iloc[:,11]

In [128]:
le = LabelEncoder()
y = le.fit_transform(y)

In [130]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,shuffle=True,stratify=y,random_state=seed)

In [132]:
ss = StandardScaler().set_output(transform='pandas')
X_train = ss.fit_transform(X_train)
X_test = ss.transform(X_test)

In [134]:
lr = Pipeline([
    ('poly',PolynomialFeatures(degree=2)),
    ('ss',StandardScaler()),
    ('lr',LogisticRegression(penalty='l1',C=0.2,solver='liblinear'))
])
lr.fit(X_train, y_train)

In [136]:
svc_lin = SVC(
    C=0.3,
    kernel='linear',
    random_state=seed
    )
svc_lin.fit(X_train,y_train)

In [137]:
svc_poly = SVC(
    C=0.8,
    kernel='poly',
    degree=1,
    gamma=0.4,
    random_state=seed
    )
svc_poly.fit(X_train,y_train)

In [140]:
svc_rbf = SVC(
    C=0.7,
    gamma=0.06,
    kernel='rbf',
    random_state=seed
    )
svc_rbf.fit(X_train.drop(columns=['DC','Ws','RH']),y_train)

In [142]:
svc_sig = SVC(
    C=0.82,
    gamma=0.25,
    kernel='sigmoid',
    coef0=-0.8,
    random_state=seed
    )
svc_sig.fit(X_train[['ISI','FFMC','FWI','Ws']],y_train)

In [144]:
knn = KNeighborsClassifier(
    weights= 'distance',
    n_neighbors= 3,
    algorithm= 'kd_tree',
    n_jobs=-1
    )
knn.fit(X_train[['ISI','FFMC','FWI','Rain']],y_train)

In [146]:
dt =  DecisionTreeClassifier(
        min_samples_split= 38,
        min_samples_leaf= 4,
        max_features= 'log2',
        max_depth= 32,
        criterion= 'log_loss',
        random_state=seed
    )
dt.fit(X_train,y_train)

In [148]:
rf =  RandomForestClassifier(
        n_estimators=50,
        min_samples_split= 10,
        min_samples_leaf= 38,
        max_features= 'sqrt',
        max_depth= 32,
        criterion= 'log_loss',
        random_state=seed
    )
rf.fit(X_train,y_train)

In [150]:
xgbc = XGBClassifier(
        reg_lambda= 18,
        n_estimators= 20,
        min_child_weight= 3,
        max_leaves= 25,
        max_depth= 3,
        learning_rate= 0.2,
        gamma= 0.5,
        colsample_bytree= 0.6,
        random_state=seed,
        reg_alpha=5
)
xgbc.fit(X_train,y_train)

In [152]:
def score_compare(models,scores,X,y):

    import pandas as pd
    
    if not isinstance(models,dict):
        raise Exception('Prove the model and model name in a dictionary format')
    if not isinstance(scores,list):
        scores = [scores]


    model_names = list(models.keys())
    score_names = [score.__name__ for score in scores]
    
    output = pd.DataFrame(index=model_names,columns=score_names)
    
    for m in range(len(models)):
        for s in range(len(scores)):
            output.loc[model_names[m],score_names[s]] = scores[s](y,models[model_names[m]].predict(X[models[model_names[m]].feature_names_in_]))
    return output

In [154]:
models = {'Logistic Regression':lr,
          'SVC Linear Kernel':svc_lin,
          'SVC Polynomial Kernel':svc_poly,
          'SVC RBF Kernel':svc_rbf,
          'SVC Sigmoid Kernel':svc_sig,
          'K-Nearest Neighbor':knn,
          'Decision Tree':dt,
          'Random Forest':rf,
          'XG Boost':xgbc}

In [156]:
score_compare(models,[accuracy_score, f1_score, recall_score, precision_score],X_test,y_test)

Unnamed: 0,accuracy_score,f1_score,recall_score,precision_score
Logistic Regression,1.0,1.0,1.0,1.0
SVC Linear Kernel,1.0,1.0,1.0,1.0
SVC Polynomial Kernel,1.0,1.0,1.0,1.0
SVC RBF Kernel,1.0,1.0,1.0,1.0
SVC Sigmoid Kernel,1.0,1.0,1.0,1.0
K-Nearest Neighbor,0.979592,0.97561,0.952381,1.0
Decision Tree,0.979592,0.97561,0.952381,1.0
Random Forest,1.0,1.0,1.0,1.0
XG Boost,1.0,1.0,1.0,1.0


Exception Decision Tree and KNN all the model work perfectly. 