# Ensemables

In [5]:
import pandas as pd
df = pd.read_csv("/Users/litianmeng/Desktop/rice_preprocessed.csv")

In [6]:
import numpy as np
from sklearn.model_selection import cross_val_score, StratifiedKFold
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier

In [8]:
# Separate features (X) and labels (y)
X = df.drop("class", axis=1)
y = df["class"]

# Define 10-fold stratified cross-validation，to ensure balanced class distribution in each fold
cvKFold = StratifiedKFold(n_splits=10, shuffle=True, random_state=0)

## Bagging

In [18]:
def bagDTClassifier(X, y, n_estimators, max_samples, max_depth):
    base_dt = DecisionTreeClassifier(criterion="entropy", max_depth=max_depth, random_state=0)
    bagging = BaggingClassifier(
        estimator=base_dt,
        n_estimators=n_estimators,
        max_samples=max_samples,
        random_state=0
    )
    scores = cross_val_score(bagging, X, y, cv=cvKFold)
    return scores.mean()

bag_score = bagDTClassifier(X, y, n_estimators=50, max_samples=100, max_depth=5)

## AdaBoost

In [19]:
def adaDTClassifier(X, y, n_estimators, learning_rate, max_depth):
    base_dt = DecisionTreeClassifier(criterion="entropy", max_depth=max_depth, random_state=0)
    ada = AdaBoostClassifier(
        estimator=base_dt,
        n_estimators=n_estimators,
        learning_rate=learning_rate,
        random_state=0
    )
    scores = cross_val_score(ada, X, y, cv=cvKFold)
    return scores.mean()

ada_score = adaDTClassifier(X, y, n_estimators=50, learning_rate=0.5, max_depth=5)

## Gradient Boosting

In [20]:
def gbClassifier(X, y, n_estimators, learning_rate):
    gb = GradientBoostingClassifier(
        n_estimators=n_estimators,
        learning_rate=learning_rate,
        random_state=0
    )
    scores = cross_val_score(gb, X, y, cv=cvKFold)
    return scores.mean()

gb_score = gbClassifier(X, y, n_estimators=50, learning_rate=0.5)

## Results

In [22]:
print("Bagging average cross-validation accuracy:", f"{bag_score:.4f}")
print("AdaBoost average cross-validation accuracy:", f"{ada_score:.4f}")
print("GB average cross-validation accuracy:", f"{gb_score:.4f}")

Bagging average cross-validation accuracy: 0.9414
AdaBoost average cross-validation accuracy: 0.9407
GB average cross-validation accuracy: 0.9321
