In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

print("Libraries imported successfully")


Libraries imported successfully


In [2]:
df = pd.read_csv("winequalityN.csv")

print("Dataset loaded successfully")
print("Shape:", df.shape)


Dataset loaded successfully
Shape: (6497, 13)


In [3]:
print("Dataset columns:")
df.columns


Dataset columns:


Index(['type', 'fixed acidity', 'volatile acidity', 'citric acid',
       'residual sugar', 'chlorides', 'free sulfur dioxide',
       'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol',
       'quality'],
      dtype='object')

In [4]:
print("Missing values before:")
print(df.isnull().sum())

df.fillna(df.mean(numeric_only=True), inplace=True)

print("\nMissing values after:")
print(df.isnull().sum())


Missing values before:
type                     0
fixed acidity           10
volatile acidity         8
citric acid              3
residual sugar           2
chlorides                2
free sulfur dioxide      0
total sulfur dioxide     0
density                  0
pH                       9
sulphates                4
alcohol                  0
quality                  0
dtype: int64

Missing values after:
type                    0
fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
dtype: int64


In [5]:
encoder = LabelEncoder()
df["type"] = encoder.fit_transform(df["type"])

print("Encoding completed for 'type'")
df[["type"]].head()


Encoding completed for 'type'


Unnamed: 0,type
0,1
1,1
2,1
3,1
4,1


In [6]:
X = df.drop("quality", axis=1)
y = df["quality"]

print("Features shape:", X.shape)
print("Target shape:", y.shape)


Features shape: (6497, 12)
Target shape: (6497,)


In [7]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

print("Feature scaling done")
print("First scaled row:\n", X_scaled[0])


Feature scaling done
First scaled row:
 [ 0.57136659 -0.1671586  -0.4235636   0.28424515  3.20697708 -0.31522185
  0.81556531  0.9599756   2.10221365 -1.35966451 -0.5459591  -1.41855821]


In [8]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.25, random_state=42
)

print("Training data shape:", X_train.shape)
print("Testing data shape :", X_test.shape)


Training data shape: (4872, 12)
Testing data shape : (1625, 12)


In [9]:
nb = GaussianNB()
nb.fit(X_train, y_train)

print("Na誰ve Bayes model trained")


Na誰ve Bayes model trained


In [11]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)

print("kNN model trained")


kNN model trained


In [14]:
# Create user input as DataFrame (with column names)
user_input_df = pd.DataFrame([{
    'type': 1,
    'fixed acidity': 7.4,
    'volatile acidity': 0.70,
    'citric acid': 0.00,
    'residual sugar': 1.9,
    'chlorides': 0.076,
    'free sulfur dioxide': 11.0,
    'total sulfur dioxide': 34.0,
    'density': 0.9978,
    'pH': 3.51,
    'sulphates': 0.56,
    'alcohol': 9.4
}])

# Scale input
user_input_scaled = scaler.transform(user_input_df)

# Predict
nb_result = nb.predict(user_input_scaled)
knn_result = knn.predict(user_input_scaled)

print("User Input Prediction")
print("Na誰ve Bayes Predicted Quality:", nb_result[0])
print("kNN Predicted Quality        :", knn_result[0])


User Input Prediction
Na誰ve Bayes Predicted Quality: 4
kNN Predicted Quality        : 5
