In [1]:
from sklearn.datasets import load_breast_cancer, load_wine, load_digits, load_iris
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Load the datasets
datasets = {'breast_cancer': load_breast_cancer(), 'wine': load_wine(), 'digits': load_digits(), 'iris': load_iris()}

# Define different sets of hyperparameters
hyperparameters = [{'n_estimators': 10, 'max_depth': 5},
                   {'n_estimators': 50, 'max_depth': 10},
                   {'n_estimators': 100, 'max_depth': 15},
                   {'n_estimators': 200, 'max_depth': 20}]

# Define different test sizes
test_sizes = [0.1, 0.2, 0.3, 0.4]

# Define data preprocessing methods
preprocessing_methods = {'standard': StandardScaler()}

# Loop over the datasets
for dataset_name, dataset in datasets.items():
    X, y = dataset.data, dataset.target
    print(f"Dataset: {dataset_name}")
    
    # Loop over the preprocessing methods
    for preprocessing_name, preprocessing_method in preprocessing_methods.items():
        # Apply the preprocessing method to the data
        X = preprocessing_method.fit_transform(X)

        # Loop over the hyperparameters
        for params in hyperparameters:
            print(f"Preprocessing: {preprocessing_name}, Hyperparameters: {params}")
            
            # Loop over the test sizes
            for test_size in test_sizes:
                # Split the data into training and testing sets
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=89)

                # Initialize the classifier
                clf = RandomForestClassifier(**params, random_state=42)

                # Fit the classifier to the training data
                clf.fit(X_train, y_train)

                # Predict the labels of the test data
                y_pred = clf.predict(X_test)

                # Calculate the accuracy of the classifier on the test data
                accuracy = accuracy_score(y_test, y_pred)

                print(f"Test size: {test_size}, Accuracy: {accuracy:.2f}")

            print()
        print()


Dataset: breast_cancer
Preprocessing: standard, Hyperparameters: {'n_estimators': 10, 'max_depth': 5}
Test size: 0.1, Accuracy: 0.93
Test size: 0.2, Accuracy: 0.96
Test size: 0.3, Accuracy: 0.95
Test size: 0.4, Accuracy: 0.96

Preprocessing: standard, Hyperparameters: {'n_estimators': 50, 'max_depth': 10}
Test size: 0.1, Accuracy: 0.95
Test size: 0.2, Accuracy: 0.96
Test size: 0.3, Accuracy: 0.95
Test size: 0.4, Accuracy: 0.97

Preprocessing: standard, Hyperparameters: {'n_estimators': 100, 'max_depth': 15}
Test size: 0.1, Accuracy: 0.95
Test size: 0.2, Accuracy: 0.96
Test size: 0.3, Accuracy: 0.95
Test size: 0.4, Accuracy: 0.96

Preprocessing: standard, Hyperparameters: {'n_estimators': 200, 'max_depth': 20}
Test size: 0.1, Accuracy: 0.95
Test size: 0.2, Accuracy: 0.96
Test size: 0.3, Accuracy: 0.95
Test size: 0.4, Accuracy: 0.96


Dataset: wine
Preprocessing: standard, Hyperparameters: {'n_estimators': 10, 'max_depth': 5}
Test size: 0.1, Accuracy: 1.00
Test size: 0.2, Accuracy: 0.97


In [4]:
!pip install xgboost

Collecting xgboost
  Using cached xgboost-1.7.5-py3-none-win_amd64.whl (70.9 MB)
Installing collected packages: xgboost
Successfully installed xgboost-1.7.5


In [5]:
from sklearn.datasets import load_breast_cancer, load_wine, load_digits, load_iris
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

# Load the datasets
datasets = {'breast_cancer': load_breast_cancer(), 'wine': load_wine(), 'digits': load_digits(), 'iris': load_iris()}

# Define the best hyperparameters for each classifier
adaboost_params = {'n_estimators': 200}
gradientboost_params = {'n_estimators': 100, 'max_depth': 5}
xgboost_params = {'n_estimators': 50, 'max_depth': 3}

# Define different test sizes
test_sizes = [0.1, 0.2, 0.3, 0.4]

# Define data preprocessing methods
preprocessing_methods = {'standard': StandardScaler()}

# Loop over the datasets
for dataset_name, dataset in datasets.items():
    X, y = dataset.data, dataset.target
    print(f"Dataset: {dataset_name}")
    
    # Loop over the preprocessing methods
    for preprocessing_name, preprocessing_method in preprocessing_methods.items():
        # Apply the preprocessing method to the data
        X = preprocessing_method.fit_transform(X)

        # Loop over the different types of boosting
        for clf_name, clf, params in [('AdaBoost', AdaBoostClassifier(), adaboost_params),
                                      ('Gradient Boosting', GradientBoostingClassifier(), gradientboost_params),
                                      ('XGBoost', XGBClassifier(use_label_encoder=False), xgboost_params)]:
            print(f"Preprocessing: {preprocessing_name}, Classifier: {clf_name}")
            
            # Loop over the test sizes
            for test_size in test_sizes:
                # Split the data into training and testing sets
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=89)

                # Initialize the classifier with the best hyperparameters
                clf.set_params(**params, random_state=42)

                # Fit the classifier to the training data
                clf.fit(X_train, y_train)

                # Predict the labels of the test data
                y_pred = clf.predict(X_test)

                # Calculate the accuracy of the classifier on the test data
                accuracy = accuracy_score(y_test, y_pred)

                print(f"Test size: {test_size}, Accuracy: {accuracy:.2f}")

            print()
        print()


Dataset: breast_cancer
Preprocessing: standard, Classifier: AdaBoost




Test size: 0.1, Accuracy: 0.93
Test size: 0.2, Accuracy: 0.96
Test size: 0.3, Accuracy: 0.96
Test size: 0.4, Accuracy: 0.96

Preprocessing: standard, Classifier: Gradient Boosting
Test size: 0.1, Accuracy: 0.93
Test size: 0.2, Accuracy: 0.95
Test size: 0.3, Accuracy: 0.95
Test size: 0.4, Accuracy: 0.94

Preprocessing: standard, Classifier: XGBoost
Test size: 0.1, Accuracy: 0.96
Test size: 0.2, Accuracy: 0.96
Test size: 0.3, Accuracy: 0.95
Test size: 0.4, Accuracy: 0.96


Dataset: wine
Preprocessing: standard, Classifier: AdaBoost




Test size: 0.1, Accuracy: 0.83
Test size: 0.2, Accuracy: 0.86
Test size: 0.3, Accuracy: 0.89
Test size: 0.4, Accuracy: 0.90

Preprocessing: standard, Classifier: Gradient Boosting
Test size: 0.1, Accuracy: 0.94
Test size: 0.2, Accuracy: 0.97
Test size: 0.3, Accuracy: 0.98
Test size: 0.4, Accuracy: 0.94

Preprocessing: standard, Classifier: XGBoost
Test size: 0.1, Accuracy: 0.89
Test size: 0.2, Accuracy: 0.94
Test size: 0.3, Accuracy: 0.98
Test size: 0.4, Accuracy: 0.99


Dataset: digits
Preprocessing: standard, Classifier: AdaBoost




Test size: 0.1, Accuracy: 0.25
Test size: 0.2, Accuracy: 0.25
Test size: 0.3, Accuracy: 0.25
Test size: 0.4, Accuracy: 0.25

Preprocessing: standard, Classifier: Gradient Boosting
Test size: 0.1, Accuracy: 0.96
Test size: 0.2, Accuracy: 0.96
Test size: 0.3, Accuracy: 0.96
Test size: 0.4, Accuracy: 0.96

Preprocessing: standard, Classifier: XGBoost
Test size: 0.1, Accuracy: 0.95
Test size: 0.2, Accuracy: 0.97
Test size: 0.3, Accuracy: 0.96
Test size: 0.4, Accuracy: 0.96


Dataset: iris
Preprocessing: standard, Classifier: AdaBoost




Test size: 0.1, Accuracy: 1.00
Test size: 0.2, Accuracy: 1.00
Test size: 0.3, Accuracy: 0.93
Test size: 0.4, Accuracy: 0.82

Preprocessing: standard, Classifier: Gradient Boosting
Test size: 0.1, Accuracy: 1.00
Test size: 0.2, Accuracy: 0.97
Test size: 0.3, Accuracy: 0.96
Test size: 0.4, Accuracy: 0.90

Preprocessing: standard, Classifier: XGBoost
Test size: 0.1, Accuracy: 1.00
Test size: 0.2, Accuracy: 0.97
Test size: 0.3, Accuracy: 0.96
Test size: 0.4, Accuracy: 0.93


