In [7]:
import pandas as pd 
import numpy as np 
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix

In [8]:
data = pd.read_csv('Data/mobile.csv')
data.head(2)

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2


In [9]:
X = data.drop(columns = "price_range")
y = data["price_range"].to_numpy()

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 123)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [10]:
knn_best_accuracy = 0 # Initialize variables to keep track of the best KNN model
knn_best_neighbors = 0
knn_best_weights = ""

for neighbors in range(3, 126): # Loop over a range of neighbor values
    for weights in ['uniform', 'distance']:  # Loop over the two types of weight strategies
        knn = KNN(n_neighbors = neighbors, weights = weights)
        knn.fit(X_train_scaled, y_train)
        y_pred = knn.predict(X_test_scaled)
        accuracy = accuracy_score(y_test, y_pred)
        if accuracy > knn_best_accuracy: # Update the best parameters if the current model's accuracy is the highest so far
            knn_best_accuracy = accuracy
            knn_best_neighbors = neighbors
            knn_best_weights = weights

print("The most accurate K-Nearest neighbors Accuracy is:", knn_best_accuracy) # Print the highest accuracy found and the corresponding parameters
print("The best K-Nearest neighbors Number of Neighbors:", knn_best_neighbors)
print("The best K-Nearest neighbors Weights:", knn_best_weights)
print('Confusion Matrix: ')
print(confusion_matrix(y_test, y_pred))

The most accurate K-Nearest neighbors Accuracy is: 0.714
The best K-Nearest neighbors Number of Neighbors: 119
The best K-Nearest neighbors Weights: distance
Confusion Matrix: 
[[96 23  0  0]
 [33 74 21  0]
 [ 0 26 91 20]
 [ 0  1 22 93]]


### K-Nearest Neighbors, 5 Fold Cross Validation

In [12]:
params = {'n_neighbors': np.arange(3, 126), 'weights': ['uniform', 'distance']} # Define the parameter grid for the K-Nearest Neighbors (KNN) classifier
grid_search = GridSearchCV(KNN(), params, cv = 5, scoring = 'accuracy') # Initialize GridSearchCV with the KNN classifier, parameter grid, 5-fold cross-validation, and accuracy scoring
gs_results = grid_search.fit(X_train_scaled, y_train)

best_knn_model = gs_results.best_estimator_
y_pred = best_knn_model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)

print('Test set accuracy:', accuracy)
print('The best parameters are: ', gs_results.best_params_)
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))

Test set accuracy: 0.708
The best parameters are:  {'n_neighbors': np.int64(120), 'weights': 'distance'}
Confusion Matrix:
[[96 23  0  0]
 [34 77 17  0]
 [ 0 27 89 21]
 [ 0  1 23 92]]
