In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
import xgboost as xgb
from sklearn.metrics import accuracy_score, classification_report
import warnings
warnings.filterwarnings('ignore')

  from pandas.core.computation.check import NUMEXPR_INSTALLED


In [2]:
np.random.seed(42)  # For reproducibility
n_samples = 100  # Creating a larger synthetic dataset

data = pd.DataFrame({
    'cap-diameter': np.random.normal(1000, 200, n_samples),
    'cap-shape': np.random.randint(1, 7, n_samples),
    'gill-attachment': np.random.randint(1, 4, n_samples),
    'gill-color': np.random.randint(1, 11, n_samples),
    'stem-height': np.random.normal(2, 1, n_samples),
    'stem-width': np.random.normal(1000, 300, n_samples),
    'stem-color': np.random.randint(11, 13, n_samples),
    'season': np.random.normal(1, 0.5, n_samples),
    'class': np.random.randint(0, 2, n_samples)  # Binary classes
})

# Prepare features and target
X = data.drop('class', axis=1)
y = data['class']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

def evaluate_model(y_true, y_pred, model_name):
    accuracy = accuracy_score(y_true, y_pred)
    report = classification_report(y_true, y_pred)
    print(f"\n{model_name} Results:")
    print(f"Accuracy: {accuracy:.4f}")
    print("Classification Report:")
    print(report)
    return accuracy

# Results storage
results = []

### Decision Stump Model

In [3]:
stump = DecisionTreeClassifier(max_depth=1)
stump.fit(X_train, y_train)
stump_pred = stump.predict(X_test)
stump_acc = evaluate_model(y_test, stump_pred, "Decision Stump")
results.append(("Decision Stump", stump_acc))


Decision Stump Results:
Accuracy: 0.6500
Classification Report:
              precision    recall  f1-score   support

           0       0.59      1.00      0.74        10
           1       1.00      0.30      0.46        10

    accuracy                           0.65        20
   macro avg       0.79      0.65      0.60        20
weighted avg       0.79      0.65      0.60        20



### XGBoost with 1 weak learner

In [4]:
xgb_1 = xgb.XGBClassifier(n_estimators=1, max_depth=1, learning_rate=0.1, objective='binary:logistic')
xgb_1.fit(X_train, y_train)
xgb_1_pred = xgb_1.predict(X_test)
xgb_1_acc = evaluate_model(y_test, xgb_1_pred, "XGBoost (1 weak learner)")
results.append(("XGBoost (1 weak learner)", xgb_1_acc))


XGBoost (1 weak learner) Results:
Accuracy: 0.6500
Classification Report:
              precision    recall  f1-score   support

           0       0.59      1.00      0.74        10
           1       1.00      0.30      0.46        10

    accuracy                           0.65        20
   macro avg       0.79      0.65      0.60        20
weighted avg       0.79      0.65      0.60        20



### XGBoost with 2 weak learners

In [5]:
xgb_2 = xgb.XGBClassifier(n_estimators=2, max_depth=1, learning_rate=0.1, objective='binary:logistic')
xgb_2.fit(X_train, y_train)
xgb_2_pred = xgb_2.predict(X_test)
xgb_2_acc = evaluate_model(y_test, xgb_2_pred, "XGBoost (2 weak learners)")
results.append(("XGBoost (2 weak learners)", xgb_2_acc))


XGBoost (2 weak learners) Results:
Accuracy: 0.6500
Classification Report:
              precision    recall  f1-score   support

           0       0.59      1.00      0.74        10
           1       1.00      0.30      0.46        10

    accuracy                           0.65        20
   macro avg       0.79      0.65      0.60        20
weighted avg       0.79      0.65      0.60        20



### XGBoost with 3 weak learners

In [6]:
xgb_3 = xgb.XGBClassifier(n_estimators=3, max_depth=1, learning_rate=0.1, objective='binary:logistic')
xgb_3.fit(X_train, y_train)
xgb_3_pred = xgb_3.predict(X_test)
xgb_3_acc = evaluate_model(y_test, xgb_3_pred, "XGBoost (3 weak learners)")
results.append(("XGBoost (3 weak learners)", xgb_3_acc))


XGBoost (3 weak learners) Results:
Accuracy: 0.6500
Classification Report:
              precision    recall  f1-score   support

           0       0.59      1.00      0.74        10
           1       1.00      0.30      0.46        10

    accuracy                           0.65        20
   macro avg       0.79      0.65      0.60        20
weighted avg       0.79      0.65      0.60        20



### XGBoost with n weak learners (n=18 in this case)

In [7]:
n_estimators = 18
xgb_n = xgb.XGBClassifier(n_estimators=n_estimators, max_depth=1, learning_rate=0.1, objective='binary:logistic')
xgb_n.fit(X_train, y_train)
xgb_n_pred = xgb_n.predict(X_test)
xgb_n_acc = evaluate_model(y_test, xgb_n_pred, f"XGBoost ({n_estimators} weak learners)")
results.append((f"XGBoost ({n_estimators} weak learners)", xgb_n_acc))


XGBoost (18 weak learners) Results:
Accuracy: 0.7000
Classification Report:
              precision    recall  f1-score   support

           0       0.62      1.00      0.77        10
           1       1.00      0.40      0.57        10

    accuracy                           0.70        20
   macro avg       0.81      0.70      0.67        20
weighted avg       0.81      0.70      0.67        20



In [8]:
ada_n = AdaBoostClassifier(DecisionTreeClassifier(max_depth=1), n_estimators=n_estimators, learning_rate=0.1)
ada_n.fit(X_train, y_train)
ada_n_pred = ada_n.predict(X_test)
ada_n_acc = evaluate_model(y_test, ada_n_pred, f"AdaBoost ({n_estimators} weak learners)")
results.append((f"AdaBoost ({n_estimators} weak learners)", ada_n_acc))


AdaBoost (18 weak learners) Results:
Accuracy: 0.6000
Classification Report:
              precision    recall  f1-score   support

           0       0.57      0.80      0.67        10
           1       0.67      0.40      0.50        10

    accuracy                           0.60        20
   macro avg       0.62      0.60      0.58        20
weighted avg       0.62      0.60      0.58        20



### Summary Table

In [9]:
print("\nPerformance Summary:")
print("=" * 50)
print(f"{'Model':<30} {'Accuracy':<10}")
print("-" * 50)
for model, acc in results:
    print(f"{model:<30} {acc:.4f}")
print("=" * 50)


Performance Summary:
Model                          Accuracy  
--------------------------------------------------
Decision Stump                 0.6500
XGBoost (1 weak learner)       0.6500
XGBoost (2 weak learners)      0.6500
XGBoost (3 weak learners)      0.6500
XGBoost (18 weak learners)     0.7000
AdaBoost (18 weak learners)    0.6000
