<a href="https://colab.research.google.com/github/sri-7198/CLASSIFIERS/blob/main/CROSS_VALIDATION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Load the dataset
wine_data = pd.read_csv("wine.data", header=None)

# Preprocess the dataset if necessary

# Split the dataset into features and target variable
X = wine_data.iloc[:, 1:]  # Assuming features start from the second column
y = wine_data.iloc[:, 0]   # Assuming target variable is in the first column

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize models for comparison
models = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "SVM": SVC(random_state=42),
    "KNN": KNeighborsClassifier(),
    "Naive Bayes": GaussianNB()
}

# Perform cross-validation and evaluate models
for name, model in models.items():
    scores = cross_val_score(model, X_train, y_train, cv=5)
    print(f"{name} Cross-Validation Accuracy: {scores.mean():.4f} (± {scores.std():.4f})")

# Select the best model based on cross-validation accuracy
best_model_name = max(models, key=lambda x: cross_val_score(models[x], X_train, y_train, cv=5).mean())
best_model = models[best_model_name]

# Fit the best model on the entire training data
best_model.fit(X_train, y_train)

# Evaluate the best model on the testing data
y_pred = best_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\nBest Model ({best_model_name}) Test Accuracy: {accuracy:.4f}")


Random Forest Cross-Validation Accuracy: 0.9786 (± 0.0286)
SVM Cross-Validation Accuracy: 0.6485 (± 0.0710)
KNN Cross-Validation Accuracy: 0.6618 (± 0.0363)
Naive Bayes Cross-Validation Accuracy: 0.9645 (± 0.0391)

Best Model (Random Forest) Test Accuracy: 1.0000
