In [1]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# Load dataset from UCI repository
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv"
df = pd.read_csv(url, sep=';')

# Separate features and target
X = df.drop('quality', axis=1).values
y = df['quality'].values

# Define k-Fold cross-validation (k=5)
kf = KFold(n_splits=5, shuffle=True, random_state=42)

accuracies = []

for train_index, test_index in kf.split(X):
    # Split into train and test
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Train model
    model = RandomForestClassifier(random_state=42)
    model.fit(X_train, y_train)

    # Test model
    y_pred = model.predict(X_test)

    # Store accuracy for this fold
    accuracies.append(accuracy_score(y_test, y_pred))

# Print accuracies for all folds
for i, acc in enumerate(accuracies, 1):
    print(f"Accuracy for fold {i}: {acc:.2f}")

# Print average accuracy
print(f"\nAverage Accuracy: {np.mean(accuracies):.2f}")


Accuracy for fold 1: 0.69
Accuracy for fold 2: 0.71
Accuracy for fold 3: 0.69
Accuracy for fold 4: 0.69
Accuracy for fold 5: 0.67

Average Accuracy: 0.69
