In [89]:
import numpy as np
import pandas as pd
import pickle

## Read the Data

In [90]:
x_train = pd.read_csv("../../data/data_train.csv")
y_train = x_train.price_range.copy()
x_train.drop(['price_range'], axis=1, inplace=True)

In [91]:
x_train

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi
0,804,1,0.8,1,12,1,41,0.9,89,1,13,709,818,2027,11,5,11,1,0,0
1,1042,0,2.2,0,15,1,11,0.6,139,5,16,68,1018,2826,18,0,2,1,0,0
2,1481,1,2.0,1,0,0,35,0.5,105,3,0,249,522,2635,17,16,4,1,0,1
3,1104,0,1.7,0,1,1,60,0.4,199,2,13,653,1413,1229,6,0,3,1,1,1
4,652,0,0.5,1,1,0,58,0.6,142,3,2,464,781,565,18,12,9,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1395,536,1,1.4,0,0,1,53,0.7,135,3,0,547,705,1211,15,10,7,1,0,1
1396,1097,0,0.8,0,10,1,21,0.1,160,7,15,1277,1352,2219,15,6,12,1,0,1
1397,1179,1,0.5,0,7,1,32,0.3,182,2,12,85,1451,340,16,5,16,1,0,0
1398,719,1,0.5,1,0,1,23,0.4,113,6,9,431,1727,3990,14,9,12,1,1,1


In [92]:
x_train.shape

(1400, 20)

In [93]:
y_train

0       1
1       2
2       2
3       0
4       0
       ..
1395    0
1396    2
1397    0
1398    3
1399    1
Name: price_range, Length: 1400, dtype: int64

In [94]:
y_train.shape

(1400,)

In [95]:
x_test = pd.read_csv("../../data/data_validation.csv")
y_test = x_test.price_range.copy()
x_test.drop(['price_range'], axis=1, inplace=True)

In [96]:
x_test

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,pc,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi
0,842,0,2.2,0,1,0,7,0.6,188,2,2,20,756,2549,9,7,19,0,0,1
1,563,1,0.5,1,2,1,41,0.9,145,5,6,1263,1716,2603,11,2,9,1,1,0
2,769,1,2.9,1,0,0,9,0.1,182,5,1,248,874,3946,5,2,7,0,0,0
3,775,0,1.0,0,3,0,46,0.7,159,2,16,862,1864,568,17,15,11,1,1,1
4,595,0,0.9,1,7,1,23,0.1,121,3,17,441,810,3752,10,2,18,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595,1547,1,2.9,0,2,0,57,0.4,114,1,3,347,957,1620,9,2,19,0,1,1
596,1882,0,2.0,0,11,1,44,0.8,113,8,19,4,743,3579,19,8,20,1,1,0
597,674,1,2.9,1,1,0,21,0.2,198,3,4,576,1809,1180,6,3,4,1,1,1
598,1965,1,2.6,1,0,0,39,0.2,187,4,3,915,1965,2032,11,10,16,1,1,1


In [97]:
x_test.shape

(600, 20)

In [98]:
y_test

0      1
1      2
2      3
3      0
4      3
      ..
595    1
596    3
597    0
598    2
599    3
Name: price_range, Length: 600, dtype: int64

In [99]:
y_test.shape

(600,)

## KNN Class

In [100]:
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, x, y):
        self.x_train = x
        self.y_train = y

    def predict(self, x_test):
        y_pred = []
        for x in x_test.values:
            dist = []
            for x_train in self.x_train.values:
                dist.append(euclidean_distance(x, x_train))
            dist = np.array(dist)
            dist_sorted = dist.argsort()[:self.k]
            y_pred.append(np.argmax(np.bincount(self.y_train.iloc[dist_sorted])))
        return np.array(y_pred)
    
    def score(self, x_test, y_test):
        y_pred = self.predict(x_test)
        return np.sum(y_pred == y_test) / len(y_test)
    
    def get_accuracy(self, x_test, y_test):
        acc = self.score(x_test, y_test)
        print("Accuracy: ", acc)
        
    def save_model(self, filename):
        pickle.dump(self, open("../model/"+ filename, 'wb'))
        
model = KNN(7)
model.fit(x_train, y_train)
model.get_accuracy(x_test, y_test)
model.save_model("knn_model.pkl")

Accuracy:  0.9283333333333333


## Comparing Algorithm

In [101]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Membagi data menjadi data pelatihan dan data uji
X_train, X_test, Y_train, Y_test = train_test_split(x_train, y_train, test_size=0.2, random_state=1)

# Membuat objek KNN dengan k=7
knn_classifier = KNeighborsClassifier(n_neighbors=7)

# Melatih model KNN dengan data pelatihan
knn_classifier.fit(X_train, Y_train)

# Memprediksi label untuk data uji
y_pred = knn_classifier.predict(X_test)

# Evaluasi performa model
accuracy = accuracy_score(Y_test, y_pred)
classification_report_result = classification_report(Y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:\n", classification_report_result)

Accuracy: 0.9428571428571428
Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.99      0.97        71
           1       0.95      0.94      0.94        80
           2       0.89      0.92      0.90        59
           3       0.97      0.93      0.95        70

    accuracy                           0.94       280
   macro avg       0.94      0.94      0.94       280
weighted avg       0.94      0.94      0.94       280



In [102]:
knn_scratch = KNN(7)
knn_scratch.fit(X_train, Y_train)
knn_scratch.get_accuracy(X_test, Y_test)

Accuracy:  0.9428571428571428


##  Load Model

In [103]:
loaded_model = pickle.load(open('../model/knn_model.pkl', 'rb'))
loaded_model.get_accuracy(x_test, y_test)

Accuracy:  0.9283333333333333
