# **Boosting (AdaBoost) Classifier**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report

data = pd.read_csv('adult.csv')

X = data.drop(columns='income')
y = data['income']

categorical_features = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), categorical_features)
    ],
    remainder='passthrough'
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipeline_adaboost = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1), n_estimators=100, random_state=42))
])

pipeline_adaboost.fit(X_train, y_train)

y_pred_adaboost = pipeline_adaboost.predict(X_test)
accuracy_adaboost = accuracy_score(y_test, y_pred_adaboost)
print(f'Accuracy of Boosting Classifier: {accuracy_adaboost:.2f}')

report = classification_report(y_test, y_pred_adaboost, target_names=['<=50K', '>50K'])
print(report)




Accuracy of Boosting Classifier: 0.86
              precision    recall  f1-score   support

       <=50K       0.88      0.95      0.91      4976
        >50K       0.78      0.59      0.67      1537

    accuracy                           0.86      6513
   macro avg       0.83      0.77      0.79      6513
weighted avg       0.86      0.86      0.86      6513



# **Bagging (Random Forest) Classifier**

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report

data = pd.read_csv('adult.csv')

X = data.drop(columns='income')
y = data['income']

categorical_features = X.select_dtypes(include=['object']).columns

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), categorical_features)
    ],
    remainder='passthrough'
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

pipeline_rf = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

pipeline_rf.fit(X_train, y_train)

y_pred_rf = pipeline_rf.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f'Accuracy of Bagging Classifier: {accuracy_rf:.2f}')

report = classification_report(y_test, y_pred_rf, target_names=['<=50K', '>50K'])
print(report)


Accuracy of Bagging Classifier: 0.85
              precision    recall  f1-score   support

       <=50K       0.89      0.92      0.90      4976
        >50K       0.71      0.61      0.66      1537

    accuracy                           0.85      6513
   macro avg       0.80      0.77      0.78      6513
weighted avg       0.85      0.85      0.85      6513

