## Practical

14. Train an AdaBoost Classifier on a sample dataset and print model accuracy.

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize AdaBoost with a weak learner (Decision Tree)
base_learner = DecisionTreeClassifier(max_depth=1)  # Weak learner (stump)
# Use 'estimator' instead of 'base_estimator'
adaboost = AdaBoostClassifier(estimator=base_learner, n_estimators=50, learning_rate=1.0, random_state=42)

# Train the model
adaboost.fit(X_train, y_train)

# Predict on test set
y_pred = adaboost.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

15. Train an AdaBoost Regressor and evaluate performance using Mean Absolute Error (MAE).

In [None]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.datasets import make_regression

# Generate synthetic regression data
X, y = make_regression(n_samples=1000, n_features=10, noise=0.2, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize AdaBoost Regressor with a Decision Tree as the base estimator
base_estimator = DecisionTreeRegressor(max_depth=4)
# Use 'estimator' instead of 'base_estimator'
adaboost_regressor = AdaBoostRegressor(estimator=base_estimator, n_estimators=50, random_state=42)

# Train the model
adaboost_regressor.fit(X_train, y_train)

# Make predictions
y_pred = adaboost_regressor.predict(X_test)

# Evaluate performance using Mean Absolute Error
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae:.4f}")

16.  Train a Gradient Boosting Classifier on the Breast Cancer dataset and print feature importance.

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd

# Load the Breast Cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Gradient Boosting Classifier
gb_classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
gb_classifier.fit(X_train, y_train)

# Make predictions
y_pred = gb_classifier.predict(X_test)

# Evaluate performance using accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

# Print feature importance
feature_importance = pd.Series(gb_classifier.feature_importances_, index=data.feature_names)
print("Feature Importance:")
print(feature_importance.sort_values(ascending=False))


17. Train a Gradient Boosting Regressor and evaluate using R-Squared Score.

In [None]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.datasets import make_regression

# Generate synthetic regression data
X, y = make_regression(n_samples=1000, n_features=10, noise=0.2, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Gradient Boosting Regressor
gb_regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
gb_regressor.fit(X_train, y_train)

# Make predictions
y_pred = gb_regressor.predict(X_test)

# Evaluate performance using R-Squared Score
r2 = r2_score(y_test, y_pred)
print(f"R-Squared Score: {r2:.4f}")


18.  Train an XGBoost Classifier on a dataset and compare accuracy with Gradient Boosting.

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer

# Load the Breast Cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Gradient Boosting Classifier
gb_classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
gb_classifier.fit(X_train, y_train)

# Make predictions with Gradient Boosting
y_pred_gb = gb_classifier.predict(X_test)

# Evaluate performance using accuracy
accuracy_gb = accuracy_score(y_test, y_pred_gb)
print(f"Gradient Boosting Accuracy: {accuracy_gb:.4f}")

# Initialize and train the XGBoost Classifier
xgb_classifier = XGBClassifier(n_estimators=100, learning_rate=0.1, use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb_classifier.fit(X_train, y_train)

# Make predictions with XGBoost
y_pred_xgb = xgb_classifier.predict(X_test)

# Evaluate performance using accuracy
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
print(f"XGBoost Accuracy: {accuracy_xgb:.4f}")


19. Train a CatBoost Classifier and evaluate using F1-Score.

In [None]:
import catboost as cb
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import f1_score, classification_report

# Generate a sample dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15,
                           n_redundant=5, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the CatBoost Classifier
catboost_model = cb.CatBoostClassifier(random_state=42, verbose=0)
catboost_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = catboost_model.predict(X_test)

# Calculate the F1-score
f1 = f1_score(y_test, y_pred)

# Print the F1-score and classification report
print("F1-Score:", f1)
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Example of using F1 score as the evaluation metric during training.
catboost_model_f1_metric = cb.CatBoostClassifier(
    random_state=42,
    eval_metric='F1',
    verbose=0,
)

catboost_model_f1_metric.fit(X_train, y_train)

y_pred_f1_metric = catboost_model_f1_metric.predict(X_test)

f1_metric_test = f1_score(y_test, y_pred_f1_metric)

print("\nF1-Score with F1 as eval metric:", f1_metric_test)

20. Train an XGBoost Regressor and evaluate using Mean Squared Error (MSE).

In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import make_regression

# Generate synthetic regression data
X, y = make_regression(n_samples=1000, n_features=10, noise=0.2, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the XGBoost Regressor
xgb_regressor = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
xgb_regressor.fit(X_train, y_train)

# Make predictions
y_pred_xgb = xgb_regressor.predict(X_test)

# Evaluate performance using Mean Squared Error (MSE)
mse_xgb = mean_squared_error(y_test, y_pred_xgb)
print(f"XGBoost Mean Squared Error: {mse_xgb:.4f}")


21. Train an AdaBoost Classifier and visualize feature importance.

In [None]:
import matplotlib.pyplot as plt
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer
import numpy as np

# Load the Breast Cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the AdaBoost Classifier
base_estimator = DecisionTreeClassifier(max_depth=1)
# Use 'estimator' instead of 'base_estimator'
adaboost_classifier = AdaBoostClassifier(estimator=base_estimator, n_estimators=50, random_state=42)
adaboost_classifier.fit(X_train, y_train)

# Make predictions
y_pred_ada = adaboost_classifier.predict(X_test)

# Evaluate performance using accuracy
accuracy_ada = accuracy_score(y_test, y_pred_ada)
print(f"AdaBoost Accuracy: {accuracy_ada:.4f}")

# Visualize feature importance
feature_importance = adaboost_classifier.feature_importances_
indices = np.argsort(feature_importance)[::-1]

plt.figure(figsize=(10, 6))
plt.title("Feature Importance in AdaBoost")
plt.bar(range(X.shape[1]), feature_importance[indices], align="center")
plt.xticks(range(X.shape[1]), np.array(data.feature_names)[indices], rotation=90)
plt.xlabel("Feature")
plt.ylabel("Importance")
plt.show()

22. Train a Gradient Boosting Regressor and plot learning curves

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import make_regression

# Generate synthetic regression data
X, y = make_regression(n_samples=1000, n_features=10, noise=0.2, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the Gradient Boosting Regressor
gb_regressor = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
gb_regressor.fit(X_train, y_train)

# Make predictions
y_pred_gb = gb_regressor.predict(X_test)

# Evaluate performance using Mean Squared Error (MSE)
mse_gb = mean_squared_error(y_test, y_pred_gb)
print(f"Gradient Boosting Mean Squared Error: {mse_gb:.4f}")

# Plot learning curves
train_errors, test_errors = [], []
for m in range(1, len(X_train)):
    gb_regressor.fit(X_train[:m], y_train[:m])
    y_train_predict = gb_regressor.predict(X_train[:m])
    y_test_predict = gb_regressor.predict(X_test)
    train_errors.append(mean_squared_error(y_train[:m], y_train_predict))
    test_errors.append(mean_squared_error(y_test, y_test_predict))

plt.figure(figsize=(10, 6))
plt.plot(range(1, len(X_train)), train_errors, label="Training Error")
plt.plot(range(1, len(X_train)), test_errors, label="Testing Error")
plt.xlabel("Training Set Size")
plt.ylabel("Mean Squared Error")
plt.title("Learning Curves for Gradient Boosting Regressor")
plt.legend()
plt.show()


23. Train an XGBoost Classifier and visualize feature importance.

In [None]:
import matplotlib.pyplot as plt
import numpy as np
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer

# Load the Breast Cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the XGBoost Classifier
xgb_classifier = XGBClassifier(n_estimators=100, learning_rate=0.1, use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb_classifier.fit(X_train, y_train)

# Make predictions
y_pred_xgb = xgb_classifier.predict(X_test)

# Evaluate performance using accuracy
accuracy_xgb = accuracy_score(y_test, y_pred_xgb)
print(f"XGBoost Accuracy: {accuracy_xgb:.4f}")

# Visualize feature importance
feature_importance = xgb_classifier.feature_importances_
indices = np.argsort(feature_importance)[::-1]

plt.figure(figsize=(10, 6))
plt.title("Feature Importance in XGBoost")
plt.bar(range(X.shape[1]), feature_importance[indices], align="center")
plt.xticks(range(X.shape[1]), np.array(data.feature_names)[indices], rotation=90)
plt.xlabel("Feature")
plt.ylabel("Importance")
plt.show()

24. Train a CatBoost Classifier and plot the confusion matrix.

In [None]:
!pip install --upgrade numpy
!pip install --upgrade --force-reinstall catboost # This is to ensure the latest CatBoost version is installed with compatibilities.
import catboost as cb # Import after upgrading or reinstalling to use the updated library
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# ... (rest of your code remains the same)

In [None]:
!pip install --upgrade numpy
!pip install --upgrade --force-reinstall catboost
import catboost as cb
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

# Generate a sample dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15,
                           n_redundant=5, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train the CatBoost Classifier
catboost_model = cb.CatBoostClassifier(random_state=42, verbose=0)
catboost_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = catboost_model.predict(X_test)

# Generate the confusion matrix
cm = confusion_matrix(y_test, y_pred)

# Plot the confusion matrix using seaborn
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

25. Train an AdaBoost Classifier with different numbers of estimators and compare accuracy.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score

# Generate a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20,
                           n_informative=2, n_redundant=10,
                           random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define the range of estimators to test
n_estimators_range = [10, 50, 100, 200, 500]

# Store accuracy scores for each number of estimators
accuracy_scores = []

# Train and evaluate AdaBoost Classifier for each number of estimators
for n_estimators in n_estimators_range:
    ada_classifier = AdaBoostClassifier(n_estimators=n_estimators, random_state=42)
    ada_classifier.fit(X_train, y_train)
    y_pred = ada_classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    accuracy_scores.append(accuracy)
    print(f"Number of Estimators: {n_estimators}, Accuracy: {accuracy:.4f}")

# Plot the accuracy scores
plt.figure(figsize=(8, 6))
plt.plot(n_estimators_range, accuracy_scores, marker='o')
plt.xlabel('Number of Estimators')
plt.ylabel('Accuracy')
plt.title('AdaBoost Classifier Accuracy vs. Number of Estimators')
plt.xticks(n_estimators_range)
plt.grid(True)
plt.show()

26. Train a Gradient Boosting Classifier and visualize the ROC curve.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import roc_curve, roc_auc_score

# Generate a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20,
                           n_informative=2, n_redundant=10,
                           random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a Gradient Boosting Classifier
gb_classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
gb_classifier.fit(X_train, y_train)

# Predict probabilities for the positive class
y_scores = gb_classifier.predict_proba(X_test)[:, 1]

# Calculate the ROC curve
fpr, tpr, thresholds = roc_curve(y_test, y_scores)

# Calculate the AUC (Area Under the Curve)
roc_auc = roc_auc_score(y_test, y_scores)

# Plot the ROC curve
plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='blue', lw=2, label='ROC curve (AUC = {:.2f})'.format(roc_auc))
plt.plot([0, 1], [0, 1], color='gray', linestyle='--')  # Random guessing line
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate (FPR)')
plt.ylabel('True Positive Rate (TPR)')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc='lower right')
plt.show()

#Print AUC
print(f"AUC: {roc_auc}")

27. Train an XGBoost Regressor and tune the learning rate using GridSearchCV.

In [None]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.datasets import make_regression
import numpy as np

# Generate a sample dataset
X, y = make_regression(n_samples=1000, n_features=10, noise=0.1, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the XGBoost Regressor
xgbr = xgb.XGBRegressor(objective='reg:squarederror', random_state=42)

# Define the parameter grid for GridSearchCV
param_grid = {
    'learning_rate': [0.01, 0.05, 0.1, 0.2, 0.3],
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 4, 5, 6],
    'subsample': [0.7, 0.8, 0.9],
    'colsample_bytree': [0.7, 0.8, 0.9]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=xgbr, param_grid=param_grid, cv=3, scoring='neg_mean_squared_error', verbose=2, n_jobs=-1)

# Fit the GridSearchCV to the training data
grid_search.fit(X_train, y_train)

# Print the best parameters and best score
print("Best parameters:", grid_search.best_params_)
print("Best score (negative MSE):", grid_search.best_score_)

# Get the best estimator
best_xgbr = grid_search.best_estimator_

# Evaluate the model on the test set
from sklearn.metrics import mean_squared_error, r2_score

y_pred = best_xgbr.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Test MSE:", mse)
print("Test R2:", r2)

# Example of how to tune only the learning rate, while keeping other params fixed
# Define a parameter grid with only learning rate
learning_rate_grid = {'learning_rate': [0.01, 0.05, 0.1, 0.2, 0.3]}

# Initialize GridSearchCV focusing only on learning rate, with fixed other parameters.
# You can set other hyperparameters as you want.
fixed_params = {
    'n_estimators': 200,
    'max_depth': 4,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'objective': 'reg:squarederror',
    'random_state': 42
}

xgbr_learning_rate_tuning = xgb.XGBRegressor(**fixed_params)

grid_search_learning_rate = GridSearchCV(estimator=xgbr_learning_rate_tuning,
                                        param_grid=learning_rate_grid,
                                        cv=3,
                                        scoring='neg_mean_squared_error',
                                        verbose=2,
                                        n_jobs=-1)

grid_search_learning_rate.fit(X_train, y_train)

print("Best learning rate parameters:", grid_search_learning_rate.best_params_)
print("Best learning rate score (negative MSE):", grid_search_learning_rate.best_score_)
best_xgbr_lr = grid_search_learning_rate.best_estimator_

y_pred_lr = best_xgbr_lr.predict(X_test)
mse_lr = mean_squared_error(y_test, y_pred_lr)
r2_lr = r2_score(y_test, y_pred_lr)

print("Test MSE (Learning Rate Tuning):", mse_lr)
print("Test R2 (Learning Rate Tuning):", r2_lr)

28. Train a CatBoost Classifier on an imbalanced dataset and compare performance with class weighting.

In [None]:
import catboost as cb
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import classification_report, roc_auc_score
from sklearn.utils import class_weight

# Create an imbalanced dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15,
                           n_redundant=5, random_state=42, weights=[0.9, 0.1])

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train CatBoost without class weights
catboost_no_weights = cb.CatBoostClassifier(random_state=42, verbose=0)
catboost_no_weights.fit(X_train, y_train)
y_pred_no_weights = catboost_no_weights.predict(X_test)
y_proba_no_weights = catboost_no_weights.predict_proba(X_test)[:, 1]

print("CatBoost without class weights:")
print(classification_report(y_test, y_pred_no_weights))
print("AUC:", roc_auc_score(y_test, y_proba_no_weights))

# Calculate class weights
class_weights_calculated = class_weight.compute_sample_weight(
    class_weight='balanced',
    y=y_train
)

# Train CatBoost with sample weights
catboost_with_weights = cb.CatBoostClassifier(random_state=42, verbose=0)
catboost_with_weights.fit(X_train, y_train, sample_weight=class_weights_calculated)
y_pred_with_weights = catboost_with_weights.predict(X_test)
y_proba_with_weights = catboost_with_weights.predict_proba(X_test)[:, 1]

print("\nCatBoost with sample weights:")
print(classification_report(y_test, y_pred_with_weights))
print("AUC:", roc_auc_score(y_test, y_proba_with_weights))

# Train CatBoost using class_weights parameter
class_weights_parameter = [sum(y_train == 0) / len(y_train), sum(y_train == 1) / len(y_train)]

catboost_class_weights_param = cb.CatBoostClassifier(random_state=42, verbose=0, class_weights=class_weights_parameter)
catboost_class_weights_param.fit(X_train, y_train)
y_pred_class_weights_param = catboost_class_weights_param.predict(X_test)
y_proba_class_weights_param = catboost_class_weights_param.predict_proba(X_test)[:, 1]

print("\nCatBoost with class weights as a parameter:")
print(classification_report(y_test, y_pred_class_weights_param))
print("AUC:", roc_auc_score(y_test, y_proba_class_weights_param))

29. Train an AdaBoost Classifier and analyze the effect of different learning rates.

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt
import numpy as np

# Create a sample dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=15,
                           n_redundant=5, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the range of learning rates to test
learning_rates = [0.01, 0.05, 0.1, 0.5, 1.0, 1.5, 2.0]

# Store the training and testing accuracies for each learning rate
train_accuracies = []
test_accuracies = []

# Train and evaluate AdaBoost for each learning rate
for lr in learning_rates:
    adaboost = AdaBoostClassifier(learning_rate=lr, random_state=42)
    adaboost.fit(X_train, y_train)

    y_train_pred = adaboost.predict(X_train)
    y_test_pred = adaboost.predict(X_test)

    train_accuracy = accuracy_score(y_train, y_train_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)

    train_accuracies.append(train_accuracy)
    test_accuracies.append(test_accuracy)

    print(f"Learning Rate: {lr}")
    print(classification_report(y_test, y_test_pred))
    print(f"Train Accuracy: {train_accuracy:.4f}")
    print(f"Test Accuracy: {test_accuracy:.4f}\n")

# Plot the training and testing accuracies
plt.figure(figsize=(10, 6))
plt.plot(learning_rates, train_accuracies, label='Train Accuracy', marker='o')
plt.plot(learning_rates, test_accuracies, label='Test Accuracy', marker='o')
plt.xlabel('Learning Rate')
plt.ylabel('Accuracy')
plt.title('AdaBoost Learning Rate Analysis')
plt.xticks(learning_rates)
plt.legend()
plt.grid(True)
plt.show()

# Find the best learning rate based on test accuracy
best_lr_index = np.argmax(test_accuracies)
best_lr = learning_rates[best_lr_index]
best_test_accuracy = test_accuracies[best_lr_index]

print(f"Best Learning Rate: {best_lr}")
print(f"Best Test Accuracy: {best_test_accuracy:.4f}")

30. Train an XGBoost Classifier for multi-class classification and evaluate using log-loss.

In [None]:
import numpy as np
import xgboost as xgb
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import log_loss

# Load dataset
digits = load_digits()
X, y = digits.data, digits.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert data into DMatrix format for XGBoost
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

# Define model parameters
params = {
    'objective': 'multi:softprob',  # Multiclass classification
    'num_class': len(np.unique(y)),  # Number of classes
    'eval_metric': 'mlogloss',  # Log loss as evaluation metric
    'max_depth': 4,
    'eta': 0.3,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'seed': 42
}

# Train the model
num_round = 100  # Number of boosting rounds
bst = xgb.train(params, dtrain, num_round)

# Make predictions
probs = bst.predict(dtest)  # Probabilities for each class

# Compute Log Loss
logloss = log_loss(y_test, probs)
print(f'Log Loss: {logloss:.4f}')
