In [3]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import load_iris, load_digits
import numpy as np

# Load datasets
iris = load_iris()
X_iris, y_iris = iris.data, iris.target

digits = load_digits()
X_digits, y_digits = digits.data, digits.target


datasets = [(X_iris, y_iris), (X_digits, y_digits)]
dataset_names = ['Iris', 'digits']

classifiers = {
    'Naive Bayes': GaussianNB(),
    'KNN': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier()
}

# Preprocess Data
scaler = StandardScaler()

for X, _ in datasets:
    scaler.fit(X)
    X = scaler.transform(X)

# Define scenarios
scenarios = [
    {'train_size': 0.75, 'test_size': 0.25},
    {'train_size': 2/3, 'test_size': 1/3}
]

for scenario in scenarios:
    print(f"\nScenario: Train Size = {scenario['train_size']*100}%, Test Size = {scenario['test_size']*100}%")
    for dataset, dataset_name in zip(datasets, dataset_names):
        X, y = dataset
        X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=scenario['train_size'], test_size=scenario['test_size'], random_state=42)

        print(f"\nDataset: {dataset_name}")
        for clf_name, clf in classifiers.items():
            clf.fit(X_train, y_train)
            accuracy = clf.score(X_test, y_test)
            print(f"{clf_name} Accuracy: {accuracy:.2f}")

        # Cross-validation
        print(f"\nCross-validation for Dataset: {dataset_name}")
        for clf_name, clf in classifiers.items():
            scores = cross_val_score(clf, X, y, cv=5)
            print(f"{clf_name} Cross-validation Accuracy: {np.mean(scores):.2f}")



Scenario: Train Size = 75.0%, Test Size = 25.0%

Dataset: Iris
Naive Bayes Accuracy: 1.00
KNN Accuracy: 1.00
Decision Tree Accuracy: 1.00

Cross-validation for Dataset: Iris
Naive Bayes Cross-validation Accuracy: 0.95
KNN Cross-validation Accuracy: 0.97
Decision Tree Cross-validation Accuracy: 0.97

Dataset: digits
Naive Bayes Accuracy: 0.86
KNN Accuracy: 0.99
Decision Tree Accuracy: 0.87

Cross-validation for Dataset: digits
Naive Bayes Cross-validation Accuracy: 0.81
KNN Cross-validation Accuracy: 0.96
Decision Tree Cross-validation Accuracy: 0.79

Scenario: Train Size = 66.66666666666666%, Test Size = 33.33333333333333%

Dataset: Iris
Naive Bayes Accuracy: 0.96
KNN Accuracy: 0.98
Decision Tree Accuracy: 0.98

Cross-validation for Dataset: Iris
Naive Bayes Cross-validation Accuracy: 0.95
KNN Cross-validation Accuracy: 0.97
Decision Tree Cross-validation Accuracy: 0.97

Dataset: digits
Naive Bayes Accuracy: 0.81
KNN Accuracy: 0.99
Decision Tree Accuracy: 0.84

Cross-validation for Da