## Imports

In [17]:
import pandas as pd

import warnings

# Ignore all warnings
warnings.filterwarnings("ignore")

## Loading Data

In [18]:
df_train = pd.read_csv('data/data_train.csv')
df_train.head()

df_test = pd.read_csv('data/data_validation.csv')
df_test.head()

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
2,769,1,2.9,1,0,0,9,0.1,182,5,...,248,874,3946,5,2,7,0,0,0,3
3,775,0,1.0,0,3,0,46,0.7,159,2,...,862,1864,568,17,15,11,1,1,1,0
4,595,0,0.9,1,7,1,23,0.1,121,3,...,441,810,3752,10,2,18,1,1,0,3


In [19]:
X_train = df_train.drop('price_range',axis=1)
y_train = df_train['price_range']

X_test = df_test.drop('price_range',axis=1)
y_test = df_test['price_range']

# Find Best Parameters

In [6]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif

model = KNeighborsClassifier()

Ns_selected_feature = [i for i in range(1, df_train.shape[1])]
neighbors = [i for i in range(1, 30)]
metrics = ['euclidean', 'manhattan', 'jaccard']

best_n, best_selected_feature, state, value = None, None, None, -1

for n in Ns_selected_feature:
    selector = SelectKBest(f_classif, k=n)
    X_train_selected = selector.fit_transform(X_train, y_train)
    
    input_features = selector.feature_names_in_
    selected_features = selector.get_feature_names_out(input_features=input_features)

    X_test_selected = X_test[selected_features]

    X_train_scaled = X_train_selected
    X_test_scaled = X_test_selected

    for neighbor in neighbors:
        for metric in metrics:
            KNN = KNeighborsClassifier(n_neighbors=neighbor, metric=metric)
            KNN.fit(X_train_scaled,y_train)

            KNN_score = KNN.score(X_test_scaled, y_test)
            
            if KNN_score > value:
                best_n, best_selected_feature, state, value = n, selected_features, (neighbor, metric), KNN_score

print('BEST parameters: ', best_n, best_selected_feature, state, value)        

BEST parameters:  6 ['battery_power' 'int_memory' 'mobile_wt' 'px_height' 'px_width' 'ram'] (19, 'euclidean') 0.94


## KNN From Scratch

In [None]:
# class KNeighborsClassifier():
#     def __init__(self, n_neighbors, metric):
#         self.n_neighbors = n_neighbors
#         self.metric = metric
#         self.X_train_scaled = []
#         self.y_train = []
#     def fit(self, X_train_scaled,y_train): 
#         """
#         Save the training data to the class
#         :param X_train_scaled: 
#         :param y_train: 
#         :return: 
#         """
#         self.X_train_scaled = X_train_scaled
#         self.y_train = y_train
        
#     def predict(self):
        
#     def get_distance(self):
#         if self.metric == 'manhattan':
            
                
        

# KNN with Scikit Learn

## Train the data

In [4]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif

N_FEATURE = 6
N_NEIGHBOR = 19
METRIC = 'euclidean'

selector = SelectKBest(f_classif, k=N_FEATURE)
X_train_selected = selector.fit_transform(X_train, y_train)

input_features = selector.feature_names_in_
selected_features = selector.get_feature_names_out(input_features=input_features)

KNN = KNeighborsClassifier(n_neighbors=N_NEIGHBOR, metric=METRIC)
KNN.fit(X_train_selected, y_train)

## Validate with Test

In [22]:
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score

def evaluate_classifier_performance(prediction, y_test):
    metrics = {
        'Accuracy Average': accuracy_score(y_test, prediction),
        'F1 Macro Average': f1_score(y_test, prediction, average='macro'),
        'F1 Micro Average': f1_score(y_test, prediction, average='micro'),
        'Precision Macro Average': precision_score(y_test, prediction, average='macro', zero_division=0),
        'Precision Micro Average': precision_score(y_test, prediction, average='micro', zero_division=0),
        'Recall Macro Average': recall_score(y_test, prediction, average='macro', zero_division=0),
        'Recall Micro Average': recall_score(y_test, prediction, average='micro', zero_division=0)
    }

    for metric, value in metrics.items():
        print(f'{metric}: {value:.2f}')

In [23]:
X_test_scaled = X_test[selected_features]
y_pred = KNN.predict(X_test_scaled)

evaluate_classifier_performance(y_pred, y_test)

Accuracy Average: 0.94
F1 Macro Average: 0.94
F1 Micro Average: 0.94
Precision Macro Average: 0.94
Precision Micro Average: 0.94
Recall Macro Average: 0.94
Recall Micro Average: 0.94
