In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib
import warnings
warnings.filterwarnings("ignore")

# ---------------- 1️⃣ Load Dataset ----------------
df = pd.read_csv("winequality-white.csv", sep=";")
df = df.drop_duplicates()

X = df.drop('quality', axis=1)
y = df['quality']

# ---------------- 2️⃣ Train/Test Split ----------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# ---------------- 3️⃣ Pipeline: Scaler + KNN ----------------
knn_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('knn', KNeighborsClassifier(n_neighbors=7))
])

# ---------------- 4️⃣ Train Model ----------------
knn_pipeline.fit(X_train, y_train)

# ---------------- 5️⃣ Prediction ----------------
y_pred = knn_pipeline.predict(X_test)

# ---------------- 6️⃣ Accuracy & Report ----------------
acc = accuracy_score(y_test, y_pred)
print("✅ Model Accuracy: {:.2f}%".format(acc*100))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))

# ---------------- 7️⃣ Save Pipeline for App ----------------
joblib.dump(knn_pipeline, "knn_wine_pipeline.pkl")
print("\nPipeline saved as 'knn_wine_pipeline.pkl'")


✅ Model Accuracy: 49.94%

Confusion Matrix:
 [[  0   0   1   3   0   0   0]
 [  0   4  19   7   1   0   0]
 [  0   7 126  91  11   0   0]
 [  0   1  98 220  38   1   0]
 [  0   1   7  85  45   0   0]
 [  0   0   1  16   8   1   0]
 [  0   0   0   0   1   0   0]]

Classification Report:
               precision    recall  f1-score   support

           3       0.00      0.00      0.00         4
           4       0.31      0.13      0.18        31
           5       0.50      0.54      0.52       235
           6       0.52      0.61      0.56       358
           7       0.43      0.33      0.37       138
           8       0.50      0.04      0.07        26
           9       0.00      0.00      0.00         1

    accuracy                           0.50       793
   macro avg       0.32      0.23      0.24       793
weighted avg       0.49      0.50      0.48       793


Pipeline saved as 'knn_wine_pipeline.pkl'
