<a href="https://colab.research.google.com/github/manishsahu001/AI-ML-DL/blob/main/Stacking_bagging_boosting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import StackingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import seaborn as sns
from sklearn.preprocessing import LabelEncoder

In [2]:
df = sns.load_dataset('iris')

In [3]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [4]:
# Split x and y
X = df.drop('species', axis=1)
y = df['species']

In [5]:
# Label encoding of y
le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.20, random_state=42, stratify=y_encoded)

In [7]:
# Base learners or Base models
base_models = [
    ('dt', DecisionTreeClassifier(random_state=42)),
    ('svm', SVC(probability=True, kernel='rbf', random_state=42)),
    ('lr', LogisticRegression(max_iter=1000))
]

In [8]:
# Meta learner or Meta model
meta_model = LogisticRegression(max_iter=1000)

# Stacking

In [9]:
# Stacking, adding the base model first then final model which is meta model and then cross validation 5
stacking_clf = StackingClassifier(
    estimators=base_models,
    final_estimator=meta_model,
    cv=5
)

In [10]:
# Training the base models
stacking_clf.fit(X_train, y_train)

In [11]:
# Doing prediction
y_pred = stacking_clf.predict(X_test)

In [12]:
# Testing accuracy
accuracy_score(y_test, y_pred)

0.9666666666666667

# Random Forest (Bagging)

In [13]:
from sklearn.ensemble import RandomForestClassifier

In [14]:
# Creating random forest model
model_rf = RandomForestClassifier(
    n_estimators=100, # number of trees
    max_depth=None, # let tree grow fully
    random_state=42
)

In [15]:
# Train the model
model_rf.fit(X_train, y_train)

In [16]:
# prediction with random forest model
y_pred = model_rf.predict(X_test)

In [17]:
# Checking accuracy score
accuracy_score(y_test, y_pred)

0.9

# Boosting

## ADA Boosting

In [20]:
# import ada boost
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier

In [22]:
# import xg boost
from xgboost import XGBClassifier

In [23]:
# creating model ada, n_estimators=100 means we want to make 100 decision trees
model_ada = AdaBoostClassifier(n_estimators=100, random_state=42)

In [24]:
# training the model
model_ada.fit(X_train, y_train)

In [26]:
# prediction with the model
y_pred_ada = model_ada.predict(X_test)

In [27]:
# Checking accuracy score
accuracy_score(y_test, y_pred_ada)

0.9333333333333333

In [28]:
# Gradient boosting, learning rate is the alpha value
model_gb = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)

In [29]:
# Training the model
model_gb.fit(X_train, y_train)

In [30]:
# Prediction with Gradient boosting
y_pred_gb = model_gb.predict(X_test)

In [31]:
# Checking accuracy score
accuracy_score(y_test, y_pred_gb)

0.9666666666666667

In [32]:
# XG Boost
model_xgb = XGBClassifier(n_estimator=100, learning_rate=0.1, max_depth=3, use_label_encoder=False, eval_matric='mlogloss', random_state=42)

In [33]:
# Training the model
model_xgb.fit(X_train, y_train)

Parameters: { "eval_matric", "n_estimator", "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [35]:
# prediction with XG boosting
y_pred_xgb = model_xgb.predict(X_test)

In [36]:
# Checking accuracy score
accuracy_score(y_test, y_pred_xgb)

0.9333333333333333