In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
# Ensemble Models
from sklearn.ensemble import VotingClassifier, AdaBoostClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier

In [3]:
# Base models for voting
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder

In [4]:
# Encode target labels as numbers
le = LabelEncoder()

In [5]:
# Load Iris dataset
df = pd.read_csv("datasets/Iris.csv")

In [6]:
# Drop unnecessary columns
df = df.drop(columns=["Id"])

In [7]:
# Features and target
X = df.drop(columns=["Species"])
y = df["Species"]
y = le.fit_transform(y)

In [8]:
# Split into train & test
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    random_state=42,
                                                    stratify=y)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
# Define base learners
log_clf = LogisticRegression(random_state=42, max_iter=500)
knn_clf = KNeighborsClassifier(n_neighbors=5)
svm_clf = SVC(probability=True, random_state=42)

In [10]:
# Hard Voting
voting_hard = VotingClassifier(
    estimators=[('lr', log_clf), ('knn', knn_clf), ('svc', svm_clf)],
    voting='hard'
)

In [11]:
# Soft Voting
voting_soft = VotingClassifier(
    estimators=[('lr', log_clf), ('knn', knn_clf), ('svc', svm_clf)],
    voting='soft'
)

In [12]:
# Train
voting_hard.fit(X_train, y_train)
voting_soft.fit(X_train, y_train)

In [13]:
# Predictions
y_pred_hard = voting_hard.predict(X_test)
y_pred_soft = voting_soft.predict(X_test)

In [14]:
# AdaBoost
ada = AdaBoostClassifier(n_estimators=100, random_state=42)
ada.fit(X_train, y_train)
y_pred_ada = ada.predict(X_test)

In [15]:
# Gradient Boosting
gbm = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
gbm.fit(X_train, y_train)
y_pred_gbm = gbm.predict(X_test)

In [16]:
# XGBoost
xgb = XGBClassifier(n_estimators=100, learning_rate=0.1, random_state=42, use_label_encoder=False, eval_metric='mlogloss')
xgb.fit(X_train, y_train)
y_pred_xgb = xgb.predict(X_test)

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [17]:
def evaluate_model(name, y_true, y_pred):
    print(f"Model: {name}")
    print("Accuracy:", accuracy_score(y_true, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_true, y_pred))
    print("Classification Report:\n", classification_report(y_true, y_pred))
    print("-"*50)

In [18]:
# Evaluate all models
evaluate_model("Voting (Hard)", y_test, y_pred_hard)
evaluate_model("Voting (Soft)", y_test, y_pred_soft)
evaluate_model("AdaBoost", y_test, y_pred_ada)
evaluate_model("Gradient Boosting", y_test, y_pred_gbm)
evaluate_model("XGBoost", y_test, y_pred_xgb)

Model: Voting (Hard)
Accuracy: 0.9333333333333333
Confusion Matrix:
 [[10  0  0]
 [ 0  9  1]
 [ 0  1  9]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       0.90      0.90      0.90        10
           2       0.90      0.90      0.90        10

    accuracy                           0.93        30
   macro avg       0.93      0.93      0.93        30
weighted avg       0.93      0.93      0.93        30

--------------------------------------------------
Model: Voting (Soft)
Accuracy: 0.9333333333333333
Confusion Matrix:
 [[10  0  0]
 [ 0  9  1]
 [ 0  1  9]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       0.90      0.90      0.90        10
           2       0.90      0.90      0.90        10

    accuracy                           0.93        30
   macro avg       0.93      0.93   