In [1]:
import pandas as pd

In [2]:
df = pd.read_csv(r"C:\Users\rajes\Downloads\car_bike_knn.csv")


In [3]:
X = df.drop(columns = ['Class','Vehicle ID'])
y = df['Class']
X

Unnamed: 0,Speed (km/h),Weight (kg),Fuel Consumption (L/100km)
0,120,1500,8.5
1,110,1600,9.0
2,130,1400,7.5
3,80,200,2.5
4,90,220,2.8
5,100,250,3.0
6,115,1550,8.0
7,85,180,2.6
8,125,1450,7.8
9,95,210,2.9


# Tuning And CV

In [4]:

from sklearn.model_selection import GridSearchCV, train_test_split, KFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

pipeline = Pipeline([
    ('scaler', StandardScaler()), 
    ('knn', KNeighborsClassifier())
])


param_grid = {
    'knn__n_neighbors': [3, 5, 7, 10], 
    'knn__weights': ['uniform', 'distance'], 
    'knn__p': [1, 2]  
}


grid_search = GridSearchCV(pipeline, param_grid, cv=KFold(n_splits=5, shuffle=True, random_state=42), scoring='accuracy')


grid_search.fit(X_train, y_train)


print(f"Best parameters: {grid_search.best_params_}")
print(f"Best cross-validation score: {grid_search.best_score_:.2f}")


Traceback (most recent call last):
  File "c:\Users\rajes\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\model_selection\_validation.py", line 971, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\rajes\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\metrics\_scorer.py", line 279, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\rajes\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\metrics\_scorer.py", line 371, in _score
    y_pred = method_caller(
             ^^^^^^^^^^^^^^
  File "c:\Users\rajes\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\metrics\_scorer.py", line 89, in _cached_call
    result, _ = _get_response_values(
                ^^^^^^^^^^^^^^^^^^^^

Best parameters: {'knn__n_neighbors': 3, 'knn__p': 1, 'knn__weights': 'distance'}
Best cross-validation score: 1.00


Traceback (most recent call last):
  File "c:\Users\rajes\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\model_selection\_validation.py", line 971, in _score
    scores = scorer(estimator, X_test, y_test, **score_params)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\rajes\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\metrics\_scorer.py", line 279, in __call__
    return self._score(partial(_cached_call, None), estimator, X, y_true, **_kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\rajes\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\metrics\_scorer.py", line 371, in _score
    y_pred = method_caller(
             ^^^^^^^^^^^^^^
  File "c:\Users\rajes\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\metrics\_scorer.py", line 89, in _cached_call
    result, _ = _get_response_values(
                ^^^^^^^^^^^^^^^^^^^^

# Cross Val Score

In [5]:

cross_val_scores = cross_val_score(grid_search.best_estimator_, X, y, cv=KFold(n_splits=5, shuffle=True, random_state=42), scoring='accuracy')
print(f"Cross-validation scores: {cross_val_scores}")
print(f"Mean cross-validation score: {cross_val_scores.mean():.2f}")


Cross-validation scores: [1. 1. 1. 1. 1.]
Mean cross-validation score: 1.00


# Model Evaluation

In [6]:

best_knn_pipeline = grid_search.best_estimator_
y_pred = best_knn_pipeline.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
accuracy_percent = accuracy * 100
print(f'Accuracy on test set: {accuracy_percent:.2f}%')


Accuracy on test set: 100.00%


# Predicting New data

In [7]:

query = pd.DataFrame([['100', '200', '5.0']], columns=X.columns)
query = query.astype(float)  


predictions = best_knn_pipeline.predict(query)
print("Predictions for new data:")
print(predictions)


Predictions for new data:
['Bike']
