In [1]:
# ==============================================================
# ðŸ“˜ K-Fold Classification Example on Real Dataset (Wine Quality)
# ==============================================================

# Step 1: Install and import required libraries
!pip install scikit-learn pandas numpy

import numpy as np
import pandas as pd
from sklearn.datasets import load_wine
from sklearn.model_selection import KFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import cross_val_predict

# Step 2: Load real dataset
data = load_wine()
X = data.data
y = data.target

# Convert to DataFrame (optional, for inspection)
df = pd.DataFrame(X, columns=data.feature_names)
df['target'] = y

print("âœ… Dataset Loaded Successfully!")
print(df.head())

# Step 3: Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 4: Define model and K-Fold
model = LogisticRegression(max_iter=2000)
kfold = KFold(n_splits=5, shuffle=True, random_state=42)

# Step 5: Evaluate model with cross-validation
scores = cross_val_score(model, X_scaled, y, cv=kfold, scoring='accuracy')
print("\nðŸ“Š Cross-validation accuracies:", scores)
print("Mean accuracy:", np.mean(scores).round(4))
print("Std deviation:", np.std(scores).round(4))

# Step 6: Detailed classification report (aggregated)
y_pred = cross_val_predict(model, X_scaled, y, cv=kfold)
print("\nðŸ§¾ Classification Report:")
print(classification_report(y, y_pred, target_names=data.target_names))

print("\nðŸ“‰ Confusion Matrix:")
print(confusion_matrix(y, y_pred))


âœ… Dataset Loaded Successfully!
   alcohol  malic_acid   ash  alcalinity_of_ash  magnesium  total_phenols  \
0    14.23        1.71  2.43               15.6      127.0           2.80   
1    13.20        1.78  2.14               11.2      100.0           2.65   
2    13.16        2.36  2.67               18.6      101.0           2.80   
3    14.37        1.95  2.50               16.8      113.0           3.85   
4    13.24        2.59  2.87               21.0      118.0           2.80   

   flavanoids  nonflavanoid_phenols  proanthocyanins  color_intensity   hue  \
0        3.06                  0.28             2.29             5.64  1.04   
1        2.76                  0.26             1.28             4.38  1.05   
2        3.24                  0.30             2.81             5.68  1.03   
3        3.49                  0.24             2.18             7.80  0.86   
4        2.69                  0.39             1.82             4.32  1.04   

   od280/od315_of_diluted_win