In [1]:
import numpy as np
import pandas as pd

import numpy.linalg as lin
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
data = pd.read_csv('data/Algerian_forest_fires.csv', usecols = ['temp', 'rel_hum', 'wind_sp', 'rain', 'class'])
data["class"] = data["class"].str.strip()
data

Unnamed: 0,temp,rel_hum,wind_sp,rain,class
0,29,57,18,0.0,not fire
1,29,61,13,1.3,not fire
2,26,82,22,13.1,not fire
3,25,89,13,2.5,not fire
4,27,77,16,0.0,not fire
...,...,...,...,...,...
239,30,65,14,0.0,fire
240,28,87,15,4.4,not fire
241,27,87,29,0.5,not fire
242,24,54,18,0.1,not fire


In [3]:
data[['temp', 'rel_hum', 'wind_sp']] = data[['temp', 'rel_hum', 'wind_sp']].astype(float)
data

Unnamed: 0,temp,rel_hum,wind_sp,rain,class
0,29.0,57.0,18.0,0.0,not fire
1,29.0,61.0,13.0,1.3,not fire
2,26.0,82.0,22.0,13.1,not fire
3,25.0,89.0,13.0,2.5,not fire
4,27.0,77.0,16.0,0.0,not fire
...,...,...,...,...,...
239,30.0,65.0,14.0,0.0,fire
240,28.0,87.0,15.0,4.4,not fire
241,27.0,87.0,29.0,0.5,not fire
242,24.0,54.0,18.0,0.1,not fire


In [4]:
norm_data = ['temp', 'rel_hum', 'wind_sp', 'rain']
data[norm_data] = (data[norm_data] - data[norm_data].min()) / (data[norm_data].max() - data[norm_data].min())
data

Unnamed: 0,temp,rel_hum,wind_sp,rain,class
0,0.35,0.521739,0.521739,0.000000,not fire
1,0.35,0.579710,0.304348,0.077381,not fire
2,0.20,0.884058,0.695652,0.779762,not fire
3,0.15,0.985507,0.304348,0.148810,not fire
4,0.25,0.811594,0.434783,0.000000,not fire
...,...,...,...,...,...
239,0.40,0.637681,0.347826,0.000000,fire
240,0.30,0.956522,0.391304,0.261905,not fire
241,0.25,0.956522,1.000000,0.029762,not fire
242,0.10,0.478261,0.521739,0.005952,not fire


In [5]:
X = data[norm_data]
y = data['class']

In [6]:
for i in range(10):
    x_train, x_test, y_train, y_test = train_test_split(X, y)

    S = np.linalg.inv(np.cov(x_train[norm_data], rowvar=False))
    for j in range(5, 36, 2):
        KNN_class = KNeighborsClassifier(n_neighbors = j, weights = 'distance',
                                         metric = 'mahalanobis', metric_params = {'VI' : S})
        KNN_class.fit(x_train, y_train)

        y_pred = KNN_class.predict(x_test)
        y_pred_prob = KNN_class.predict_proba(x_test)

        print(accuracy_score(y_test, y_pred), j)

0.7540983606557377 5
0.7213114754098361 7
0.7377049180327869 9
0.7377049180327869 11
0.7540983606557377 13
0.7540983606557377 15
0.8032786885245902 17
0.8032786885245902 19
0.819672131147541 21
0.8032786885245902 23
0.7540983606557377 25
0.7704918032786885 27
0.7540983606557377 29
0.7704918032786885 31
0.7377049180327869 33
0.7540983606557377 35
0.7704918032786885 5
0.7540983606557377 7
0.7704918032786885 9
0.7540983606557377 11
0.7377049180327869 13
0.7704918032786885 15
0.7540983606557377 17
0.7540983606557377 19
0.7540983606557377 21
0.7540983606557377 23
0.7540983606557377 25
0.7704918032786885 27
0.7540983606557377 29
0.7540983606557377 31
0.7540983606557377 33
0.7377049180327869 35
0.8032786885245902 5
0.7704918032786885 7
0.8032786885245902 9
0.7868852459016393 11
0.8032786885245902 13
0.8032786885245902 15
0.819672131147541 17
0.8524590163934426 19
0.819672131147541 21
0.8360655737704918 23
0.8524590163934426 25
0.8688524590163934 27
0.8524590163934426 29
0.8524590163934426 31


In [7]:
x_train, x_test, y_train, y_test = train_test_split(X, y)

S = np.linalg.inv(np.cov(x_train[norm_data], rowvar=False))

SVC_class = svm.SVC()
SVC_class.fit(x_train, y_train)

y_pred = SVC_class.predict(x_test)

accuracy_score(y_test, y_pred)

0.7213114754098361