<a href="https://colab.research.google.com/github/phubestp/Machine_Learning_Final_Project/blob/main/machine_learning_proj.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Algerian Forest Fires

[dataset](https://archive.ics.uci.edu/dataset/547/algerian+forest+fires+dataset)

The dataset includes 244 instances that regroup a data of two regions of Algeria,namely the Bejaia region located in the northeast of Algeria and the Sidi Bel-abbes region located in the northwest of Algeria.

In [1]:
import pandas as pd
import numpy as np
from ml_lib.preprocessing import TrainTestSplit
from ml_lib.knn import KNN
from ml_lib.svm import SVM
from ml_lib.logistic_regression import LogisticRegression
from ml_lib.naive_bayes import GaussianNaiveBayes
from ml_lib.accuracy import Accuracy
from ml_lib.perceptron import Perceptron
from ml_lib.scaler import MinMaxScaler

## Import data

In [2]:
data = pd.read_csv("data.csv",skipinitialspace=True)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 244 entries, 0 to 243
Data columns (total 14 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   day          244 non-null    int64  
 1   month        244 non-null    int64  
 2   year         244 non-null    int64  
 3   Temperature  244 non-null    int64  
 4   RH           244 non-null    int64  
 5   Ws           244 non-null    int64  
 6   Rain         244 non-null    float64
 7   FFMC         244 non-null    float64
 8   DMC          244 non-null    float64
 9   DC           244 non-null    float64
 10  ISI          244 non-null    float64
 11  BUI          244 non-null    float64
 12  FWI          244 non-null    float64
 13  Classes      244 non-null    object 
dtypes: float64(7), int64(6), object(1)
memory usage: 26.8+ KB


## Data Preprocessing

In [3]:
data['Classes'].str.strip()

0      notfire
1      notfire
2      notfire
3      notfire
4      notfire
        ...   
239       fire
240    notfire
241    notfire
242    notfire
243    notfire
Name: Classes, Length: 244, dtype: object

In [4]:
scaler = MinMaxScaler()
split = TrainTestSplit()

X = data.drop(['Classes', 'day', 'month', 'year'], axis=1)
y = data['Classes'].replace({"notfire":-1, "fire":1})
X = scaler.scale(np.array(X))
y = np.array(y)

In [5]:
X_train, X_test, y_train, y_test = split.train_test_split(X, y) #default = test_size=0.33, random_state=42

In [6]:
accuracy = Accuracy()

## KNN (K-nearest neighbor)

In [7]:
knn = KNN(10, 2)
knn.fit(X_train, y_train)
pred = knn.predict(X_test)
accuracy.score(pred, y_test)

0.9384615384615385

## Perceptron

In [8]:
perceptron = Perceptron(X_train.shape[1])
perceptron.fit(X_train, y_train)
pred = perceptron.predict(X_test)
accuracy.score(pred, y_test)

[[ -5.7          0.2173913   -4.17391304   7.02380952  20.02077151
   -8.9892638    8.24918033  26.45263158  -5.64723468  25.18971061
  -17.        ]]


0.9692307692307692

## Naive Bayes (Gaussian Naive Bayes)

In [9]:
nb = GaussianNaiveBayes()
nb.fit(X_train, y_train)
pred = nb.predict(X_test)
accuracy.score(pred, y_test)

0.8769230769230769

## SVM (Support Vector Machine)

In [10]:
svm = SVM(X_train.shape[1], 10)
svm.fit(X_train, y_train)
pred = svm.predict(X_test)
accuracy.score(pred, y_test)

0.9076923076923077

## Logistic Regression

In [11]:
lr = LogisticRegression(X_train.shape[1])
lr.fit(X_train, y_train)
pred = lr.predict(X_test)
accuracy.score(pred, y_test)

0.9384615384615385