In [20]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

# 1. Data

## 1.1 Load

In [5]:
df_wood = pd.read_csv("data/features_wood.csv")
df_plastic = pd.read_csv("data/features_plastic.csv")

In [4]:
df_wood

Unnamed: 0,ID,contrast_left,Correlation_left,Energy_left,Homogeneity_left,lable
0,0,332.892653,0.939959,0.000620,0.233690,wood
1,1,479.675918,0.861065,0.002766,0.190891,wood
2,2,525.824082,0.922167,0.000750,0.245179,wood
3,3,278.780816,0.945416,0.000641,0.252217,wood
4,4,1041.700000,0.772586,0.000473,0.105567,wood
...,...,...,...,...,...,...
95,95,769.682857,0.750541,0.000526,0.130848,wood
96,96,994.076735,0.703506,0.000485,0.103064,wood
97,97,1212.673878,0.783652,0.000509,0.117477,wood
98,98,1755.659184,0.627685,0.000464,0.088685,wood


In [6]:
df_plastic

Unnamed: 0,ID,contrast_left,Correlation_left,Energy_left,Homogeneity_left,lable
0,0,623.241224,0.879982,0.002429,0.390277,plastic
1,1,457.222449,0.927222,0.014592,0.452937,plastic
2,2,627.317959,0.871048,0.000872,0.197748,plastic
3,3,990.888163,0.836677,0.009799,0.308981,plastic
4,4,866.158776,0.868840,0.000754,0.236540,plastic
...,...,...,...,...,...,...
95,95,427.678367,0.909434,0.000598,0.209245,plastic
96,96,415.891020,0.928213,0.000882,0.279669,plastic
97,97,495.092653,0.924394,0.001110,0.327051,plastic
98,98,237.915918,0.929845,0.002779,0.391457,plastic


## 1.2 Praprocessing

### 1.2.1. concatenate

In [8]:
df_data = pd.concat([df_plastic,df_wood])
df_data

Unnamed: 0,ID,contrast_left,Correlation_left,Energy_left,Homogeneity_left,lable
0,0,623.241224,0.879982,0.002429,0.390277,plastic
1,1,457.222449,0.927222,0.014592,0.452937,plastic
2,2,627.317959,0.871048,0.000872,0.197748,plastic
3,3,990.888163,0.836677,0.009799,0.308981,plastic
4,4,866.158776,0.868840,0.000754,0.236540,plastic
...,...,...,...,...,...,...
95,95,769.682857,0.750541,0.000526,0.130848,wood
96,96,994.076735,0.703506,0.000485,0.103064,wood
97,97,1212.673878,0.783652,0.000509,0.117477,wood
98,98,1755.659184,0.627685,0.000464,0.088685,wood


### 1.2.2. Shuffle

In [9]:
df_data = df_data.sample(frac=1)
df_data

Unnamed: 0,ID,contrast_left,Correlation_left,Energy_left,Homogeneity_left,lable
15,15,958.986939,0.826940,0.000759,0.182269,wood
61,61,733.659592,0.829459,0.000613,0.166281,wood
62,62,185.504082,0.965666,0.000750,0.215695,plastic
50,50,248.543673,0.964678,0.002299,0.368135,plastic
52,52,481.595510,0.903959,0.000609,0.206300,wood
...,...,...,...,...,...,...
84,84,1057.913469,0.816945,0.000697,0.176992,plastic
64,64,1172.472245,0.745264,0.000487,0.120072,wood
73,73,1108.097959,0.822649,0.000500,0.137139,plastic
54,54,299.616327,0.923243,0.001098,0.263208,wood


### 1.2.3. features and lables

In [10]:
X = df_data.drop(columns=['ID', 'lable'], axis=1)
Y = df_data['lable']

In [11]:
X

Unnamed: 0,contrast_left,Correlation_left,Energy_left,Homogeneity_left
15,958.986939,0.826940,0.000759,0.182269
61,733.659592,0.829459,0.000613,0.166281
62,185.504082,0.965666,0.000750,0.215695
50,248.543673,0.964678,0.002299,0.368135
52,481.595510,0.903959,0.000609,0.206300
...,...,...,...,...
84,1057.913469,0.816945,0.000697,0.176992
64,1172.472245,0.745264,0.000487,0.120072
73,1108.097959,0.822649,0.000500,0.137139
54,299.616327,0.923243,0.001098,0.263208


In [12]:
Y

15       wood
61       wood
62    plastic
50    plastic
52       wood
       ...   
84    plastic
64       wood
73    plastic
54       wood
35       wood
Name: lable, Length: 200, dtype: object

### 1.2.4. to numpy array

In [13]:
X = X.to_numpy()
Y = Y.to_numpy()

In [17]:
for i in range(Y.shape[0]):
    if Y[i] == 'wood':
        Y[i] = 1
    else:
        Y[i] = 0

In [18]:
Y

array([1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0,
       1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0,
       1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1,
       0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0,
       1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1,
       1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0,
       1, 1], dtype=object)

### 1.2.4. Split

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

In [22]:
print('X_train : ', X_train.shape)
print('X_test : ', X_test.shape)
print('y_train: ', y_train.shape)
print('y_test : ', y_test.shape)

X_train :  (140, 4)
X_test :  (60, 4)
y_train:  (140,)
y_test :  (60,)


# 2. KNN

## 2.1. KNN Algorithm

In [24]:
def Euclid_distance (point1, point2):
    

    return np.linalg.norm(point1 - point2)

In [25]:
def find_neighbors (test_point, train_points, k):

    neighbors = []
    neighbors_indcies = []
    Dist = []
    sorted_Dist = []

    for i in range (train_points.shape[0]):
        distance = Euclid_distance (test_point, train_points[i])
        Dist.append(distance)
        
    sorted_Dist = Dist.copy()
    sorted_Dist.sort()

    for i in range (k):
        neighbors.append(train_points[Dist.index(sorted_Dist[i])])
        neighbors_indcies.append(Dist.index(sorted_Dist[i]))

    return np.array(neighbors), neighbors_indcies

## 2.2. KNN Model

In [26]:
def KNN (test_feature_vectors, train_feature_vectors,y_train, k):

    y_hat = []

    for i in range (test_feature_vectors.shape[0]):
        _, neighbors_indcies = find_neighbors (test_feature_vectors[i], train_feature_vectors, k)
        print("(",i,"/",test_feature_vectors.shape[0])
        y_hat.append(y_train[neighbors_indcies])

    return np.array(y_hat)

In [27]:
y_hat = KNN (X_test, X_train,y_train, 1)

( 0 / 60
( 1 / 60
( 2 / 60
( 3 / 60
( 4 / 60
( 5 / 60
( 6 / 60
( 7 / 60
( 8 / 60
( 9 / 60
( 10 / 60
( 11 / 60
( 12 / 60
( 13 / 60
( 14 / 60
( 15 / 60
( 16 / 60
( 17 / 60
( 18 / 60
( 19 / 60
( 20 / 60
( 21 / 60
( 22 / 60
( 23 / 60
( 24 / 60
( 25 / 60
( 26 / 60
( 27 / 60
( 28 / 60
( 29 / 60
( 30 / 60
( 31 / 60
( 32 / 60
( 33 / 60
( 34 / 60
( 35 / 60
( 36 / 60
( 37 / 60
( 38 / 60
( 39 / 60
( 40 / 60
( 41 / 60
( 42 / 60
( 43 / 60
( 44 / 60
( 45 / 60
( 46 / 60
( 47 / 60
( 48 / 60
( 49 / 60
( 50 / 60
( 51 / 60
( 52 / 60
( 53 / 60
( 54 / 60
( 55 / 60
( 56 / 60
( 57 / 60
( 58 / 60
( 59 / 60


In [29]:
y_hat.shape

(60, 1)

# 3. Calculate Accuracy

In [28]:
def accuracy_metric(actual, predicted):
    correct = 0
    for i in range(len(actual)):
        if actual[i] == predicted[i]:
            correct += 1
    return correct / float(len(actual)) * 100.0

In [30]:
accuracy_metric(y_hat, y_test)

43.333333333333336