In [None]:
#Evaluate the performance of a model:
#(1)Boosting (2)Bagging
#(3)Random Forest


In [None]:
#Theory
'''
Eg:
Boosting:
Suppose you're a student preparing for a spelling bee competition. Boosting is like having a tutor who focuses on the words you keep misspelling.
Each time you practice, your tutor helps you learn from your mistakes and gets you ready for the competition.

Bagging:
Now, imagine you're trying to make an important decision, like where to invest your money. Bagging is like asking many different financial advisors for their opinions. You combine all their advice to make the best investment choices and
reduce the risk of making a bad decision.

Random Forest:
Suppose you're trying to predict which movie a person will enjoy based on their past movie preferences.
 Random Forest is like asking a group of movie critics, each with their own tastes and expertise, for their recommendations.
  By combining all their opinions, you can make a more accurate prediction of which movie the person will like.


Question: What is boosting in machine learning?
Answer: Boosting is an ensemble learning technique that combines multiple weak learners sequentially to create a strong learner.
It focuses on training each subsequent model to correct the errors of the previous ones.

Question: How do you evaluate the performance of a boosting model?
Answer: The performance of a boosting model can be evaluated using various metrics such as accuracy, precision, recall, F1-score, and ROC-AUC score.
 Additionally, you can examine learning curves to assess the model's convergence and potential overfitting.

Bagging:

Question: What is bagging in machine learning?
Answer: Bagging, short for Bootstrap Aggregating, is an ensemble learning technique that combines the predictions of multiple independent models
trained on different subsets of the training data. It aims to reduce variance and improve the stability of the model.

Question: How do you evaluate the performance of a bagging model?
Answer: Similar to boosting, the performance of a bagging model can be evaluated using metrics such as accuracy, precision,
recall, F1-score, and ROC-AUC score. Additionally, you can assess the diversity among the base learners and examine their individual performances.

Random Forest:

Question: What is Random Forest?
Answer: Random Forest is an ensemble learning technique that builds multiple decision trees during training and combines their predictions
 through averaging or voting. It introduces randomness in the tree-building process to improve generalization and reduce overfitting.

Question: How do you evaluate the performance of a Random Forest model?
Answer: The performance of a Random Forest model can be evaluated using the same metrics as other classification models, such as accuracy,
precision, recall, F1-score, and ROC-AUC score. Additionally, you can analyze feature importance to understand the contribution of each feature to the model's predictions. Furthermore, examining the out-of-bag error estimate can provide an unbiased estimate of the model's performance without the need for a separate validation set.
'''

In [None]:
#1
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, RandomForestClassifier
from sklearn.metrics import accuracy_score

# Generate some random classification data
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define classifiers
boosting_classifier = AdaBoostClassifier(n_estimators=50, random_state=42)
bagging_classifier = BaggingClassifier(n_estimators=50, random_state=42)
random_forest_classifier = RandomForestClassifier(n_estimators=50, random_state=42)

# Train classifiers
boosting_classifier.fit(X_train, y_train)
bagging_classifier.fit(X_train, y_train)
random_forest_classifier.fit(X_train, y_train)

# Predictions
boosting_pred = boosting_classifier.predict(X_test)
bagging_pred = bagging_classifier.predict(X_test)
random_forest_pred = random_forest_classifier.predict(X_test)

# Evaluate performance
boosting_accuracy = accuracy_score(y_test, boosting_pred)
bagging_accuracy = accuracy_score(y_test, bagging_pred)
random_forest_accuracy = accuracy_score(y_test, random_forest_pred)

# Print results
print("Boosting Accuracy:", boosting_accuracy)
print("Bagging Accuracy:", bagging_accuracy)
print("Random Forest Accuracy:", random_forest_accuracy)


Boosting Accuracy: 0.87
Bagging Accuracy: 0.885
Random Forest Accuracy: 0.88


In [None]:
#2
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Load the iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define classifiers
boosting_classifier = AdaBoostClassifier(n_estimators=50, random_state=42)
bagging_classifier = BaggingClassifier(n_estimators=50, random_state=42)
random_forest_classifier = RandomForestClassifier(n_estimators=50, random_state=42)

# Train classifiers
boosting_classifier.fit(X_train, y_train)
bagging_classifier.fit(X_train, y_train)
random_forest_classifier.fit(X_train, y_train)

# Predictions
boosting_pred = boosting_classifier.predict(X_test)
bagging_pred = bagging_classifier.predict(X_test)
random_forest_pred = random_forest_classifier.predict(X_test)

# Evaluation metrics for Boosting
accuracy = accuracy_score(y_test, boosting_pred)
precision = precision_score(y_test, boosting_pred, average='macro')
recall = recall_score(y_test, boosting_pred, average='macro')
f1 = f1_score(y_test, boosting_pred, average='macro')
print("Boosting Performance:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print()

# Evaluation metrics for Bagging
accuracy = accuracy_score(y_test, bagging_pred)
precision = precision_score(y_test, bagging_pred, average='macro')
recall = recall_score(y_test, bagging_pred, average='macro')
f1 = f1_score(y_test, bagging_pred, average='macro')
print("Bagging Performance:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print()

# Evaluation metrics for Random Forest
accuracy = accuracy_score(y_test, random_forest_pred)
precision = precision_score(y_test, random_forest_pred, average='macro')
recall = recall_score(y_test, random_forest_pred, average='macro')
f1 = f1_score(y_test, random_forest_pred, average='macro')
print("Random Forest Performance:")
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)
print()


Boosting Performance:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

Bagging Performance:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

Random Forest Performance:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

