# 🍷 KNN Classifier for Wine Quality Prediction
An end-to-end implementation with data preprocessing, hyperparameter tuning, and prediction.

##  Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split, GridSearchCV

##  Load and Explore the Dataset

In [None]:
if not os.path.exists('winequality-red-sorted.csv'):
    raise FileNotFoundError("Dataset 'winequality-red-sorted.csv' not found in the current directory.")

data_frame = pd.read_csv('winequality-red-sorted.csv')
print(data_frame.head())
print("\nData shape:", data_frame.shape)

##  Split Data into Features (X) and Target (y)

In [None]:
X = data_frame.iloc[:, :-1]
y = data_frame.iloc[:, -1]

##  Normalize the Feature Values Using MinMaxScaler

In [None]:
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)
print(X_scaled[:5])

##  Split the Data into Training and Testing Sets

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.25, random_state=0)

##  Define the Hyperparameter Grid for GridSearchCV

In [None]:
param_grid = {'n_neighbors': [3, 5, 7, 9, 11], 'weights': ['uniform', 'distance']}

##  Perform Hyperparameter Tuning Using GridSearchCV

In [None]:
grid_search = GridSearchCV(KNeighborsClassifier(), param_grid, cv=5, scoring='accuracy')
grid_search.fit(X_train, y_train)

##  View All Hyperparameter Tuning Results

In [None]:
results_df = pd.DataFrame(grid_search.cv_results_)
print(results_df[['param_n_neighbors', 'param_weights', 'mean_test_score']])

##  Select the Best Performing Model

In [None]:
best_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)

##  Evaluate the Accuracy of the Best Model on the Test Set

In [None]:
y_prediction = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_prediction)
print("Test Set Accuracy:", accuracy)

##  Make Predictions on New Data Using the Best Model

In [None]:
new_data = [[7.4, 0.66, 0, 1.8, 0.075, 13, 40, 0.9978, 3.51, 0.56, 9.4]]
new_data_scaled = scaler.transform(new_data)
prediction = best_model.predict(new_data_scaled)
print("Predicted wine quality for new sample:", prediction[0])