In [1]:
#gradient boosting, ensembling (with logistic regression), and basic hyperparameter tuning.

In [2]:
#Load data and make it binary

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load Iris and make it binary (setosa vs. not-setosa)
iris = load_iris()
X = iris.data
y = (iris.target == 0).astype(int)  # Setosa = 1, others = 0

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [3]:
#Gradient Boosting

from sklearn.ensemble import GradientBoostingClassifier

gb = GradientBoostingClassifier()
gb.fit(X_train, y_train)
print("GB test score:", gb.score(X_test, y_test))


GB test score: 1.0


In [4]:
#Add another model for ensembling (Logistic Regression)

from sklearn.linear_model import LogisticRegression

lr = LogisticRegression()
lr.fit(X_train, y_train)
print("LR test score:", lr.score(X_test, y_test))


LR test score: 1.0


In [5]:
#Simple Ensemble (average probabilities)

import numpy as np

# Average predicted probabilities
gb_proba = gb.predict_proba(X_test)[:, 1]
lr_proba = lr.predict_proba(X_test)[:, 1]
ensemble_proba = (gb_proba + lr_proba) / 2
ensemble_pred = (ensemble_proba > 0.5).astype(int)

from sklearn.metrics import accuracy_score
print("Ensemble test score:", accuracy_score(y_test, ensemble_pred))


Ensemble test score: 1.0


In [6]:
#Model Tuning (for Gradient Boosting)

from sklearn.model_selection import GridSearchCV

params = {'n_estimators': [50, 100], 'learning_rate': [0.05, 0.1]}
grid = GridSearchCV(GradientBoostingClassifier(), params, cv=3)
grid.fit(X_train, y_train)

print("Best params:", grid.best_params_)
print("Tuned GB test score:", grid.score(X_test, y_test))


Best params: {'learning_rate': 0.05, 'n_estimators': 50}
Tuned GB test score: 1.0
