In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [9]:
# Option 1: Using scikit-learn's GradientBoostingClassifier (most common)
def gradient_boosting_classifier_sklearn(data, target_column, test_size=0.2, random_state=42, n_estimators=100, learning_rate=0.1, max_depth=3):
    """
    Trains and evaluates a Gradient Boosting Classifier using scikit-learn.

    Args:
        data (pd.DataFrame): The dataset.
        target_column (str): The name of the column containing the target variable.
        test_size (float): The proportion of the dataset to use for testing.  Defaults to 0.2.
        random_state (int): Random seed for reproducibility. Defaults to 42.
        n_estimators (int): The number of boosting stages to perform. Defaults to 100.
        learning_rate (float):  Shrinks the contribution of each tree by learning_rate. Defaults to 0.1.
        max_depth (int): Maximum depth of the individual regression estimators. Defaults to 3.

    Returns:
        tuple: (trained_model, accuracy, classification_report, confusion_matrix)
    """

    # 1. Prepare the data
    X = data.drop(target_column, axis=1)
    y = data[target_column]

    # 2. Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    # 3. Create and train the Gradient Boosting Classifier
    model = GradientBoostingClassifier(n_estimators=n_estimators, learning_rate=learning_rate, max_depth=max_depth, random_state=random_state)
    model.fit(X_train, y_train)

    # 4. Make predictions on the test set
    y_pred = model.predict(X_test)

    # 5. Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    return model, accuracy, report, cm

# Example usage (using scikit-learn)
if __name__ == '__main__':
    # Create a sample DataFrame (replace with your actual data)
    data = pd.DataFrame({
        'feature1': [1, 2, 3, 4, 5, 1, 2, 3, 4, 5],
        'feature2': [5, 4, 3, 2, 1, 5, 4, 3, 2, 1],
        'target':   [0, 0, 0, 1, 1, 0, 0, 1, 1, 1]
    })

    # Specify the target column
    target_column = 'target'

    # Train and evaluate the model using scikit-learn
    model, accuracy, report, cm = gradient_boosting_classifier_sklearn(data, target_column)

    print("Scikit-learn Gradient Boosting Classifier Results:")
    print(f"Accuracy: {accuracy}")
    print("Classification Report:\n", report)
    print("Confusion Matrix:\n", cm)

Scikit-learn Gradient Boosting Classifier Results:
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       1.00      1.00      1.00         1

    accuracy                           1.00         2
   macro avg       1.00      1.00      1.00         2
weighted avg       1.00      1.00      1.00         2

Confusion Matrix:
 [[1 0]
 [0 1]]
