**Name:** Aniket Bapusaheb Labade

**PRN No.:** RBT21CB012

**Subject:** Machine Learning

**Practical No. 6** Comparitive Analysis of all ensemble techniques.:


**AdaBoost:**
AdaBoost is an ensemble learning method that combines multiple weak classifiers to create a strong classifier, with a focus on improving the performance of misclassified instances in each iteration.

**XGBoost:**
XGBoost is an efficient and scalable gradient boosting framework that uses tree-based models, employing a regularization term to control model complexity and a unique algorithm for parallel and distributed computing.

**CatBoost:**
CatBoost is a gradient boosting algorithm designed for categorical feature support, automatically handling categorical variables without preprocessing and employing techniques like ordered boosting for enhanced accuracy.

**Gradient Boosting:**
Gradient Boosting is a general ensemble learning technique that builds a series of weak learners sequentially, each correcting the errors of its predecessor by minimizing a differentiable loss function through gradient descent.

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import metrics

In [None]:
iris = datasets.load_iris()
X = iris.data
y = iris.target

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [None]:
abc = AdaBoostClassifier(n_estimators=50,
                         learning_rate=1)

model = abc.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

Accuracy: 0.9555555555555556


In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_digits
gbc = GradientBoostingClassifier(n_estimators=300,
                                 learning_rate=0.05,
                                 random_state=100,
                                 max_features=5 )

gbc.fit(X_train, y_train)
pred_y = gbc.predict(X_test)
acc = accuracy_score(y_test, pred_y)
print("Gradient Boosting Classifier accuracy is : {:.2f}".format(acc))

Gradient Boosting Classifier accuracy is : 0.96


In [None]:
pip install numpy pandas scikit-learn xgboost catboost

Collecting catboost
  Downloading catboost-1.2.2-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: catboost
Successfully installed catboost-1.2.2


In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SelectKBest, f_classif

iris = load_iris()
X1 = iris.data
y = iris.target

np.random.seed(42)
a = np.random.normal(0, 0.5, X1.shape)
X = X1 + a

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

k = 2
selector = SelectKBest(f_classif, k=k)
X_train_selected = selector.fit_transform(X_train_scaled, y_train)
X_test_selected = selector.transform(X_test_scaled)

adaboost_param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2]
}

adaboost_grid_search = GridSearchCV(AdaBoostClassifier(), adaboost_param_grid, cv=3)
adaboost_grid_search.fit(X_train_selected, y_train)
best_adaboost_model = adaboost_grid_search.best_estimator_
best_adaboost_pred = best_adaboost_model.predict(X_test_selected)
best_adaboost_accuracy = accuracy_score(y_test, best_adaboost_pred)

xgboost_param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 4, 5]
}

xgboost_grid_search = GridSearchCV(XGBClassifier(), xgboost_param_grid, cv=3)
xgboost_grid_search.fit(X_train_selected, y_train)
best_xgboost_model = xgboost_grid_search.best_estimator_
best_xgboost_pred = best_xgboost_model.predict(X_test_selected)
best_xgboost_accuracy = accuracy_score(y_test, best_xgboost_pred)

catboost_param_grid = {
    'iterations': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2]
}

catboost_grid_search = GridSearchCV(CatBoostClassifier(silent=True), catboost_param_grid, cv=3)
catboost_grid_search.fit(X_train_selected, y_train)
best_catboost_model = catboost_grid_search.best_estimator_
best_catboost_pred = best_catboost_model.predict(X_test_selected)
best_catboost_accuracy = accuracy_score(y_test, best_catboost_pred)

gradientboost_param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 4, 5]
}

gradientboost_grid_search = GridSearchCV(GradientBoostingClassifier(), gradientboost_param_grid, cv=3)
gradientboost_grid_search.fit(X_train_selected, y_train)
best_gradientboost_model = gradientboost_grid_search.best_estimator_
best_gradientboost_pred = best_gradientboost_model.predict(X_test_selected)
best_gradientboost_accuracy = accuracy_score(y_test, best_gradientboost_pred)

results = pd.DataFrame({
    'Model': ['AdaBoost', 'XGBoost', 'CatBoost', 'Gradient Boosting'],
    'Accuracy': [best_adaboost_accuracy, best_xgboost_accuracy, best_catboost_accuracy, best_gradientboost_accuracy]
})

print(results)


               Model  Accuracy
0           AdaBoost  0.800000
1            XGBoost  0.900000
2           CatBoost  0.866667
3  Gradient Boosting  0.900000
