In [5]:
from sklearn.datasets import load_wine
from sklearn.model_selection import StratifiedKFold, RandomizedSearchCV
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score
import numpy as np

# Caricamento del dataset Wine
data = load_wine()
X, y = data.data, data.target


In [None]:
# Creazione della pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Scaling standard
    ('pca', PCA()),                # Riduzione dimensionalità con PCA
    ('gbc', GradientBoostingClassifier())  # Gradient Boosting Classifier
])

# Definizione del grid per RandomizedSearchCV
param_grid = {
    'pca__n_components': [5, 10, 15],
    'gbc__n_estimators': [50, 100, 150], 
    'gbc__learning_rate': [0.01, 0.1, 0.2],  
    'gbc__max_depth': [3, 5, 7]  
}



In [7]:
# Configurazione del RandomizedSearchCV con StratifiedKFold
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
random_search = RandomizedSearchCV(
    pipeline, param_distributions=param_grid, 
    n_iter=20, cv=cv, scoring='accuracy', random_state=42, n_jobs=-1
)

# Esecuzione del RandomizedSearchCV
random_search.fit(X, y)

10 fits failed out of a total of 100.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
10 fits failed with the following error:
Traceback (most recent call last):
  File "/home/piercarlo-ciraselli/Documents/GitHub/Machine-Learning/.venv/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/piercarlo-ciraselli/Documents/GitHub/Machine-Learning/.venv/lib/python3.12/site-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/piercarlo-ciraselli/Documents/GitHub/Machine-Learning/.venv/lib/python3.12/site-packages/sklearn/pipeline.

In [8]:
# Miglior modello e valutazione
best_model = random_search.best_estimator_
print(f"Migliori parametri trovati: {random_search.best_params_}")

# Validazione con StratifiedKFold
scores = []
for train_index, test_index in cv.split(X, y):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    best_model.fit(X_train, y_train)
    y_pred = best_model.predict(X_test)
    scores.append(accuracy_score(y_test, y_pred))

print(f"Accuratezza media su StratifiedKFold: {np.mean(scores):.2f}")

Migliori parametri trovati: {'pca__n_components': 5, 'gbc__n_estimators': 150, 'gbc__max_depth': 3, 'gbc__learning_rate': 0.1}
Accuratezza media su StratifiedKFold: 0.95
