In [3]:
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn import metrics
import pandas as pd

# Load and preprocess the data
graduation_data = pd.read_csv('../../data/student-graduation/graduation_dataset.csv')
X = graduation_data.drop(columns=['Target'])
y = LabelEncoder().fit_transform(graduation_data['Target'])  # Encode the target variable

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Initialize results dictionary
results = {}

# Boosting with AdaBoost
ada_boost = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=1),
    n_estimators=50,
    random_state=42
)
ada_boost.fit(X_train, y_train)
y_pred_ada = ada_boost.predict(X_test)
results['AdaBoost'] = metrics.accuracy_score(y_test, y_pred_ada)
f1_ada = metrics.f1_score(y_test, y_pred_ada, average='weighted')

# Bagging with BaggingClassifier
bagging = BaggingClassifier(
    estimator=DecisionTreeClassifier(),
    n_estimators=50,
    random_state=42
)
bagging.fit(X_train, y_train)
y_pred_bag = bagging.predict(X_test)
results['Bagging'] = metrics.accuracy_score(y_test, y_pred_bag)
f1_bag = metrics.f1_score(y_test, y_pred_bag, average='weighted')

# Display results
print(f"AdaBoost Accuracy: {results['AdaBoost'] * 100:.2f}%")
print(f"AdaBoost F1 Score: {f1_ada * 100:.2f}%\n")
print(f"Bagging Accuracy: {results['Bagging'] * 100:.2f}%")
print(f"Bagging F1 Score: {f1_bag * 100:.2f}%")




AdaBoost Accuracy: 73.95%
AdaBoost F1 Score: 72.70%

Bagging Accuracy: 75.08%
Bagging F1 Score: 73.89%
