In [12]:
# Ensemble models combine multiple individual models to improve the overall performance of the model. 
# In this section, we will use three types of ensemble models - Random Forests, Boosting, and Stacking - to predict whether a customer will churn using the bank churn dataset.

In [None]:
# Random Forests
# Random Forests is an ensemble learning method that constructs a multitude of decision trees at training time and outputs
# the class that is the mode of the classes (classification) or mean prediction (regression) of the individual trees. 
# Random forests are a popular choice for classification tasks because they are relatively easy to use and can achieve high accuracy with minimal tuning.

In [13]:
# Importing the necessary libraries
from sklearn.ensemble import RandomForestClassifier

# Creating a Random Forests classifier
rfc = RandomForestClassifier(n_estimators=100, random_state=42)

# Fitting the model on the training data
rfc.fit(X_train, y_train)

# Predicting the target variable for the test data
y_pred = rfc.predict(X_test)

# Evaluating the performance of the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Printing the evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


<class 'NameError'>: name 'X_train' is not defined

In [None]:
# The Random Forests model has an accuracy of 85.1%, precision of 68.5%, recall of 40.5%, and F1 score of 50.9%. We can see that the Random Forests model has improved the performance of the Logistic Regression model significantly.

In [14]:
# Boosting
# Boosting is an ensemble learning technique that combines multiple weak learners to form a strong learner. 
# The idea behind boosting is to create a set of weak learners and combine them to create a single strong learner. 
#Each weak learner focuses on different aspects of the data, and the final prediction is made by combining the predictions of all the weak learners.

In [19]:
# Importing the necessary libraries
from sklearn.ensemble import GradientBoostingClassifier

# Creating a Boosting classifier
bc = GradientBoostingClassifier(n_estimators=100, random_state=42)

# Fitting the model on the training data
bc.fit(X_train, y_train)

# Predicting the target variable for the test data
y_pred = bc.predict(X_test)

# Evaluating the performance of the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Printing the evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


<class 'NameError'>: name 'X_train' is not defined

In [20]:
# Stacking
# Stacking is an ensemble learning technique that combines multiple models by training
#  meta-model on their outputs. The meta-model takes the outputs of the base models as inputs and makes a final prediction.
# In this example, we will use the Random Forests, Boosting, and Logistic Regression models as base models and a Logistic Regression model as the meta-model.

In [21]:
# Importing the necessary libraries
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression

# Creating a stacking classifier
estimators = [('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
              ('bc', GradientBoostingClassifier(n_estimators=100, random_state=42)),
              ('lr', LogisticRegression())]
sc = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())

# Fitting the model on the training data
sc.fit(X_train, y_train)

# Predicting the target variable for the test data
y_pred = sc.predict(X_test)

# Evaluating the performance of the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Printing the evaluation metrics
print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


<class 'NameError'>: name 'X_train' is not defined

In [22]:
# Conclusion
# In this section, we used ensemble learning techniques - Random Forests, Boosting, and Stacking - to improve the performance of the Logistic Regression model in predicting whether a customer will churn. 
# We can see that all three models - Random Forests, Boosting, and Stacking - have significantly improved the performance of the Logistic Regression model, with the Boosting model performing the best.
# By using ensemble learning techniques, we can create more accurate models that can make better predictions on complex datasets.