In [1]:
# Author: Roi Yehoshua <roiyeho@gmail.com>
# July 2024
# License: MIT

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

np.random.seed(42)

In [3]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

X, y = make_classification(n_samples=500, n_classes=3, n_clusters_per_class=1, 
                           n_features=2, n_informative=2, n_redundant=0, class_sep=0.8, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [4]:
from sklearn.ensemble import AdaBoostClassifier

clf = AdaBoostClassifier(random_state=42)
clf.fit(X_train, y_train)



In [5]:
print(f'Train accuracy: {clf.score(X_train, y_train):.4f}')
print(f'Test accuracy: {clf.score(X_test, y_test):.4f}')

Train accuracy: 0.5867
Test accuracy: 0.5520


In [6]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import RandomizedSearchCV

params = {
    'n_estimators': np.arange(50, 501, 50),     
    'learning_rate': np.arange(0.1, 1.0, 0.1),
    'estimator__min_samples_leaf': np.arange(1, 11)  
}
clf = AdaBoostClassifier(estimator=DecisionTreeClassifier(), random_state=42)
random_search = RandomizedSearchCV(clf, params, n_iter=50, cv=3, random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)
print(random_search.best_params_)

{'n_estimators': 100, 'learning_rate': 0.8, 'estimator__min_samples_leaf': 5}




In [7]:
print(f'Train accuracy: {random_search.score(X_train, y_train):.4f}')
print(f'Test accuracy: {random_search.score(X_test, y_test):.4f}')

Train accuracy: 1.0000
Test accuracy: 0.9520
