# Fault Type Classification Using K-Nearest Neighbors (K-NN)

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Importing the dataset

In [2]:
faults = pd.read_csv('MSC_Dissertation_test_original3.csv')
faults

Unnamed: 0,Fault_location,Fault_section,Fault_type,632_Active_PhaseA,632_Active_PhaseB,632_Active_PhaseC,633_Active_PhaseA,633_Active_PhaseB,633_Active_PhaseC,692_Active_PhaseA,...,692_Active_PhaseC,671_Active_PhaseA,671_Active_PhaseB,671_Active_PhaseC,645_Active_PhaseA,645_Active_PhaseB,645_Active_PhaseC,684_Active_PhaseA,684_Active_PhaseB,684_Active_PhaseC
0,0.0,0,0,1.130,0.9087,1.189,0.142200,0.104900,0.104900,0.4613,...,0.2534,-0.9627,-0.4158,-0.8718,0,-0.331700,-0.79430,-0.1243,0,-0.1667
1,1.0,1,1,1.108,0.8908,1.166,0.139400,0.102800,0.102900,0.4522,...,0.2484,-0.9437,-0.4076,-0.8545,0,-0.325300,-0.07786,-0.1219,0,-0.1634
2,2.3,2,1,134.500,116.1000,127.500,0.000201,0.000184,0.000046,0.4610,...,0.2533,-0.9622,-0.4157,-0.8714,0,-0.331600,-0.07940,-0.1243,0,-0.1666
3,2.5,2,1,89.550,77.3400,85.400,0.000087,0.000088,0.000021,0.4611,...,0.2533,-0.9624,-0.4157,-0.8716,0,-0.331700,-0.07910,-0.1243,0,-0.1666
4,2.7,2,1,67.220,58.0600,64.360,0.000048,0.000052,0.000013,0.4612,...,0.2533,-0.9624,-0.4157,-0.8716,0,-0.331700,-0.07942,-0.1243,0,-0.1667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
152,11.9,11,4,1.130,25.0900,1.194,0.142200,0.104900,0.104900,0.4613,...,0.2534,-0.9627,-0.4158,-0.8718,0,-0.003662,-0.11320,-0.1243,0,-0.1667
153,12.3,12,4,1.130,21.0300,1.195,0.142200,0.104900,0.104900,0.4613,...,0.2534,-0.9627,-0.4158,-0.8718,0,-4.557000,-0.10910,-0.1243,0,-0.1667
154,12.5,12,4,1.130,19.4700,1.195,0.142200,0.104900,0.104900,0.4613,...,0.2534,-0.9627,-0.4158,-0.8718,0,-5.264000,-0.10760,-0.1243,0,-0.1667
155,12.7,12,4,1.130,18.1300,1.195,0.142200,0.104900,0.104900,0.4613,...,0.2534,-0.9627,-0.4158,-0.8718,0,-5.739000,-0.10620,-0.1243,0,-0.1667


In [3]:
X = faults.iloc[:, 3:21].values
y = faults.iloc[:, 2].values

In [4]:
y

array([0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
       4, 4, 4], dtype=int64)

In [5]:
X


array([[ 1.130e+00,  9.087e-01,  1.189e+00, ..., -1.243e-01,  0.000e+00,
        -1.667e-01],
       [ 1.108e+00,  8.908e-01,  1.166e+00, ..., -1.219e-01,  0.000e+00,
        -1.634e-01],
       [ 1.345e+02,  1.161e+02,  1.275e+02, ..., -1.243e-01,  0.000e+00,
        -1.666e-01],
       ...,
       [ 1.130e+00,  1.947e+01,  1.195e+00, ..., -1.243e-01,  0.000e+00,
        -1.667e-01],
       [ 1.130e+00,  1.813e+01,  1.195e+00, ..., -1.243e-01,  0.000e+00,
        -1.667e-01],
       [ 1.130e+00,  1.596e+01,  1.196e+00, ..., -1.243e-01,  0.000e+00,
        -1.667e-01]])

## Splitting the dataset into the Training set and Test set

In [58]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [59]:
print(X_train)

[[ 9.623e+00  5.083e+00  6.770e+00 ... -2.729e-03  0.000e+00 -3.734e-03]
 [ 1.130e+00  3.720e+01  1.193e+00 ... -1.243e-01  0.000e+00 -1.667e-01]
 [ 5.493e+00  1.004e+00  1.189e+00 ... -7.958e-05  0.000e+00 -2.658e-01]
 ...
 [ 1.130e+00  9.087e-01  1.189e+00 ... -1.243e-01  0.000e+00 -1.667e-01]
 [ 1.130e+00  9.087e-01  1.189e+00 ... -1.243e-01  0.000e+00 -1.667e-01]
 [ 3.235e+01  1.191e+00  1.210e+00 ... -1.122e-04  0.000e+00 -2.658e-02]]


In [60]:
print(y_train)

[1 2 3 1 3 2 4 3 1 3 3 3 1 2 1 4 2 1 3 2 4 1 3 4 4 4 4 4 3 3 3 4 4 4 4 2 3
 2 4 3 2 2 2 1 4 3 1 1 2 2 2 1 4 4 1 2 4 3 3 1 4 1 4 3 4 2 1 4 2 2 2 1 4 4
 2 3 1 4 1 3 1 1 2 4 4 0 1 1 2 3 2 1 1 3 2 1 4 4 4 1 1 3 3 3 3 4 2 2 1 3 4
 4 1 2 4 3 2 3 1 1 1 3 2 3 2]


In [61]:
print(X_test)

[[ 4.518e+01  2.353e+01  3.149e+01  1.422e-01  1.049e-01  1.049e-01
   6.540e-05  2.476e-05  1.178e-04 -1.442e-04 -1.826e-04 -3.248e-04
   0.000e+00 -3.317e-01 -7.943e-02 -1.764e-05  0.000e+00 -7.753e-05]
 [ 4.713e+01  2.551e+01  1.221e+00  4.653e-04  3.434e-04  1.456e-01
   4.612e-01  5.778e-02  2.534e-01 -9.625e-01 -4.158e-01 -8.718e-01
   0.000e+00 -3.317e-01 -7.944e-02 -1.243e-01  0.000e+00 -1.667e-01]
 [ 1.130e+00  9.087e-01  1.189e+00  1.422e-01  1.049e-01  1.049e-01
   4.613e-01  5.778e-02  2.534e-01 -9.627e-01 -4.158e-01 -8.718e-01
   0.000e+00 -3.317e-01 -7.943e-02 -1.243e-01  0.000e+00 -1.667e-01]
 [ 1.169e+00  8.330e+01  1.195e+00  1.638e-01  4.847e-03  1.545e-01
   4.613e-01  5.777e-02  2.534e-01 -9.626e-01 -4.157e-01 -8.718e-01
   0.000e+00 -3.317e-01 -7.945e-02 -1.243e-01  0.000e+00 -1.667e-01]
 [ 5.768e+00  9.749e-01  1.180e+00  1.422e-01  1.049e-01  1.049e-01
   5.087e-02  8.757e-02  3.373e-01 -2.974e+00 -5.761e-01 -8.981e-01
   0.000e+00 -3.317e-01 -7.943e-02 -1.437e+0

In [62]:
print(y_test)

[1 2 3 4 2 3 4 2 3 3 1 4 1 2 3 4 4 3 4 2 2 1 2 1 2 1 3 1 3 3 2 2]


## Feature Scaling

In [63]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [64]:
print(X_train)

[[-0.07412862 -0.3192882   0.02733999 ...  0.43594626  0.
   1.21995978]
 [-0.42266071  1.20792932 -0.30509901 ...  0.21307139  0.
   0.2152202 ]
 [-0.24361379 -0.51325153 -0.30533744 ...  0.44080341  0.
  -0.39576426]
 ...
 [-0.42266071 -0.5177832  -0.30533744 ...  0.21307139  0.
   0.2152202 ]
 [-0.42266071 -0.5177832  -0.30533744 ...  0.21307139  0.
   0.2152202 ]
 [ 0.85853223 -0.50435936 -0.30408566 ...  0.44074361  0.
   1.07910659]]


In [65]:
print(X_test)

[[ 1.38504425e+00  5.57897766e-01  1.50087274e+00 -9.00132270e-02
  -1.09729177e-01 -8.48808094e-02 -8.47119351e-01 -3.67425309e-01
  -9.67616231e-01  1.50818327e+00  7.64927850e-01  2.07589556e+00
   0.00000000e+00  2.85826065e-01  1.62859411e-01  4.40916965e-01
   0.00000000e+00  1.24250313e+00]
 [ 1.46506751e+00  6.52050106e-01 -3.03429959e-01 -5.18444165e-01
  -4.16311589e-01  1.06543353e-01 -6.89694118e-02 -2.75076269e-01
  -2.14231339e-01  4.18955460e-01  2.71260409e-01  3.80813418e-01
   0.00000000e+00  2.85826065e-01  1.62847535e-01  2.13071393e-01
   0.00000000e+00  2.15220196e-01]
 [-4.22660707e-01 -5.17783202e-01 -3.05337445e-01 -9.00132270e-02
  -1.09729177e-01 -8.48808094e-02 -6.88006650e-02 -2.75076269e-01
  -2.14231339e-01  4.18729093e-01  2.71260409e-01  3.80813418e-01
   0.00000000e+00  2.85826065e-01  1.62859411e-01  2.13071393e-01
   0.00000000e+00  2.15220196e-01]
 [-4.21060241e-01  3.40006207e+00 -3.04979791e-01 -2.47214652e-02
  -4.03106066e-01  1.48402691e-01 -6.

## Training the K-NN model on the Training set

In [87]:
from sklearn.neighbors import KNeighborsClassifier
classifier = KNeighborsClassifier(n_neighbors = 1, metric = 'minkowski', p = 7)
classifier.fit(X_train, y_train)

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
                     metric_params=None, n_jobs=None, n_neighbors=1, p=7,
                     weights='uniform')

## Predicting a new result

In [88]:
print(classifier.predict(sc.transform([[1.130, 21.0300, 1.195, 0.142200, 0.104900, 0.104900, 0.4613, 0.0577, 0.2534, -0.9627, -0.4158, -0.8718, 0, -4.557000, -0.10910, -0.1243, 0, -0.1667]])))

[4]


## Predicting the Test set results

In [92]:
y_pred = classifier.predict(X_test)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

[[1 1]
 [2 2]
 [3 3]
 [4 4]
 [3 2]
 [3 3]
 [2 4]
 [4 2]
 [3 3]
 [3 3]
 [3 1]
 [3 4]
 [2 1]
 [1 2]
 [3 3]
 [3 4]
 [4 4]
 [3 3]
 [4 4]
 [2 2]
 [3 2]
 [1 1]
 [3 2]
 [1 1]
 [3 2]
 [1 1]
 [2 3]
 [1 1]
 [3 3]
 [3 3]
 [2 2]
 [2 2]]


## Making the Confusion Matrix

In [90]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[5 1 1 0]
 [1 4 4 1]
 [0 1 8 0]
 [0 1 2 3]]


0.625

In [91]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           1       0.83      0.71      0.77         7
           2       0.57      0.40      0.47        10
           3       0.53      0.89      0.67         9
           4       0.75      0.50      0.60         6

    accuracy                           0.62        32
   macro avg       0.67      0.63      0.63        32
weighted avg       0.65      0.62      0.62        32



In [85]:
from sklearn.metrics import mean_absolute_error,mean_squared_error
mean_absolute_error(y_test,y_pred)

0.46875

In [86]:
mean_squared_error(y_test,y_pred)

0.65625

## The End









































































