In [1]:
# Import required libraries
from sklearn.datasets import load_breast_cancer
from sklearn.ensemble import RandomForestClassifier
import pandas as pd

# Load the Breast Cancer dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

# Train a Random Forest Classifier
model = RandomForestClassifier(random_state=42)
model.fit(X, y)

# Get feature importance
importances = pd.Series(model.feature_importances_, index=X.columns)

# Print the top 5 most important features
top_5_features = importances.sort_values(ascending=False).head(5)
print("Top 5 Most Important Features:\n")
print(top_5_features)


Top 5 Most Important Features:

worst area              0.139357
worst concave points    0.132225
mean concave points     0.107046
worst radius            0.082848
worst perimeter         0.080850
dtype: float64


In [3]:
# Import required libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score

# Load the Iris dataset
data = load_iris()
X, y = data.data, data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a single Decision Tree
dt = DecisionTreeClassifier(random_state=42)
dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)
dt_accuracy = accuracy_score(y_test, y_pred_dt)

# Train a Bagging Classifier using Decision Trees (use 'estimator' not 'base_estimator')
bagging_model = BaggingClassifier(
    estimator=DecisionTreeClassifier(),  # corrected parameter name
    n_estimators=50,
    random_state=42,
    n_jobs=-1
)
bagging_model.fit(X_train, y_train)
y_pred_bag = bagging_model.predict(X_test)
bagging_accuracy = accuracy_score(y_test, y_pred_bag)

# Print the accuracies
print("Accuracy of Single Decision Tree:", dt_accuracy)
print("Accuracy of Bagging Classifier:", bagging_accuracy)


Accuracy of Single Decision Tree: 1.0
Accuracy of Bagging Classifier: 1.0


In [4]:
# Import required libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the Iris dataset
data = load_iris()
X, y = data.data, data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Define the Random Forest Classifier
rf = RandomForestClassifier(random_state=42)

# Define the hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 150],
    'max_depth': [3, 5, 7, None]
}

# Perform Grid Search with Cross-Validation
grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5, n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get the best parameters
best_params = grid_search.best_params_

# Train the final model with best parameters
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test)

# Calculate accuracy
final_accuracy = accuracy_score(y_test, y_pred)

# Print the results
print("Best Parameters:", best_params)
print("Final Accuracy:", final_accuracy)


Best Parameters: {'max_depth': 3, 'n_estimators': 150}
Final Accuracy: 1.0


In [5]:
# Import required libraries
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingRegressor, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

# Load the California Housing dataset
data = fetch_california_housing()
X, y = data.data, data.target

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a Bagging Regressor using Decision Trees
bagging_model = BaggingRegressor(
    estimator=DecisionTreeRegressor(),
    n_estimators=50,
    random_state=42,
    n_jobs=-1
)
bagging_model.fit(X_train, y_train)
y_pred_bag = bagging_model.predict(X_test)
mse_bag = mean_squared_error(y_test, y_pred_bag)

# Train a Random Forest Regressor
rf_model = RandomForestRegressor(
    n_estimators=50,
    random_state=42,
    n_jobs=-1
)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
mse_rf = mean_squared_error(y_test, y_pred_rf)

# Print the Mean Squared Errors
print("Mean Squared Error of Bagging Regressor:", mse_bag)
print("Mean Squared Error of Random Forest Regressor:", mse_rf)


Mean Squared Error of Bagging Regressor: 0.25787382250585034
Mean Squared Error of Random Forest Regressor: 0.25772464361712627


In [6]:
# Import required libraries
import pandas as pd
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, roc_auc_score

# Sample: Load a financial dataset (replace with actual loan dataset)
# For demonstration, we use a synthetic dataset
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10,
                           n_redundant=5, n_clusters_per_class=2, weights=[0.7],
                           flip_y=0.01, random_state=42)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

# Initialize Gradient Boosting Classifier
gb_model = GradientBoostingClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=42
)

# Cross-Validation
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = cross_val_score(gb_model, X_train, y_train, cv=cv, scoring='roc_auc')

# Train final model
gb_model.fit(X_train, y_train)
y_pred = gb_model.predict(X_test)
roc_auc = roc_auc_score(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

# Print results
print("Cross-Validation ROC-AUC Scores:", cv_scores)
print("Mean CV ROC-AUC:", cv_scores.mean())
print("Test Accuracy:", accuracy)
print("Test ROC-AUC:", roc_auc)


Cross-Validation ROC-AUC Scores: [0.98441621 0.97290818 0.97986094 0.91350826 0.97351798]
Mean CV ROC-AUC: 0.964842313265341
Test Accuracy: 0.9166666666666666
Test ROC-AUC: 0.8688416846311583
