In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier, BaggingClassifier
from sklearn.linear_model import LogisticRegressionCV
from sklearn.svm import SVC
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.ensemble import VotingClassifier

In [2]:
# Load the data
data = pd.read_csv('../data/WA_Fn-UseC_-Telco-Customer-Churn.csv')

# Data Cleaning

In [3]:
to_drop = ['customerID']
data = data.drop(to_drop, axis=1)

In [4]:
data['TotalCharges'] = data['TotalCharges'].replace('', None)
data['TotalCharges'] = data['TotalCharges'].replace(' ', None)

In [5]:
data['TotalCharges'] = data['TotalCharges'].astype(float)
data = data.dropna()
data = data.reset_index().drop('index', axis=1)

# Preprocessing

In [6]:
for column in data.columns:
    if len(np.unique(data[column])) == 2:
        data[column] = pd.factorize(data[column])[0]

In [7]:
to_be_onehot = ['MultipleLines', 'InternetService', 'OnlineSecurity', 'OnlineBackup',
          'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies',
          'Contract', 'PaymentMethod']

for column in to_be_onehot:
    # create an instance of OneHotEncoder
    ohe = OneHotEncoder()

    # use fit_transform() to convert the string variable to one-hot-encoded data
    data_encoded = ohe.fit_transform(data[[column]]).toarray()

    # create a new DataFrame with the one-hot-encoded data
    data_onehot = pd.DataFrame(data_encoded, columns=ohe.get_feature_names_out([column]))
    
    data = pd.concat([data, data_onehot], axis=1)
    data = data.drop(column, axis=1)

In [8]:
X = data.drop('Churn', axis=1).values.astype(float)
y = data['Churn'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)

# Random Forest
Random Forest is a supervised machine learning algorithm used for classification, regression, and other tasks using decision trees. It is an ensemble learning method that creates a set of decision trees from a randomly selected subset of the training set. The algorithm then collects the votes from different decision trees to decide the final prediction. Random Forest is a powerful algorithm that can handle high-dimensional datasets with many features and can also handle missing data.

# Random Forest in Scikit-Learn
Scikit-Learn is a popular Python library for machine learning. It provides an implementation of the Random Forest algorithm through the RandomForestClassifier and RandomForestRegressor classes. These classes are built on top of Scikit-Learn's decision tree implementation and provide additional functionality for building and tuning random forest models.

To use Random Forest in Scikit-Learn, you can follow these steps:
1. Import the necessary modules: "RandomForestClassifier" or "RandomForestRegressor" from "sklearn.ensemble", "GridSearchCV" from "sklearn.model_selection", and any other necessary modules.

2. Define the parameter grid for the hyperparameters you want to tune. For example, you might want to tune the number of trees "n_estimators", the maximum depth of each tree "max_depth", and the minimum number of samples required to split an internal node "min_samples_split".

3. Create an instance of the "RandomForestClassifier" or "RandomForestRegressor" with default hyperparameters.

4. Create an instance of "GridSearchCV" with the random forest instance, the parameter grid, and any other necessary arguments such as cross-validation folds.

5. Fit the "GridSearchCV" instance to your training data.

6. Access the best estimator using the "best_estimator_" attribute of the "GridSearchCV" instance.

7. Use the best estimator to make predictions on new data.

In [9]:
# Define hyperparameters to search
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [5, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

In [10]:
# Create a random forest classifier
rf = RandomForestClassifier()

In [11]:
# Create grid search object
grid_search = GridSearchCV(estimator = rf, param_grid = param_grid, cv = 3)

In [None]:
# Fit grid search object to data
grid_search.fit(X_train, y_train)

In [None]:
# Print best hyperparameters
print(grid_search.best_params_)

In [None]:
rf_best = grid_search.best_estimator_

pred = rf_best.predict(X_test)

In [None]:
accuracy_score(y_test, pred)

In [None]:
confusion_matrix(y_test, pred)

# Gradient Boosting
Gradient Boosting is a machine learning technique used for both regression and classification problems. It is an ensemble method that combines several weak learners into a single strong learner in an iterative fashion. The weak learners are typically decision trees. Gradient Boosting works by iteratively adding decision trees to the model, with each tree correcting the errors of the previous one. The final prediction is the weighted sum of the predictions of all the trees.
# Gradient Boosting in Scikit-Learn
Scikit-Learn provides an implementation of Gradient Boosting for both regression and classification problems through the "GradientBoostingRegressor" and "GradientBoostingClassifier" classes, respectively. These classes are built on top of Scikit-Learn's decision tree implementation and provide additional functionality for building and tuning Gradient Boosting models.

To use Gradient Boosting in Scikit-Learn, you can follow these steps:
1. Import the necessary modules: "GradientBoostingRegressor" or "GradientBoostingClassifier" from "sklearn.ensemble", "GridSearchCV" from "sklearn.model_selection", and any other necessary modules.

2. Define the parameter grid for the hyperparameters you want to tune. For example, you might want to tune the number of trees "n_estimators", the maximum depth of each tree "max_depth", and the learning rate "learning_rate".

3. Create an instance of the "GradientBoostingRegressor" or "GradientBoostingClassifier" with default hyperparameters.

4. Create an instance of "GridSearchCV" with the Gradient Boosting instance, the parameter grid, and any other necessary arguments such as cross-validation folds.

5. Fit the "GridSearchCV" instance to your training data.

6. Access the best estimator using the "best_estimator_" attribute of the "GridSearchCV" instance.

7. Use the best estimator to make predictions on new data.

In [None]:
# Define parameter grid
param_grid = {
    'learning_rate': [0.1, 0.05, 0.01],
    'n_estimators': [100, 500, 1000],
    'max_depth': [3, 5, 7]
}

In [None]:
# Create GradientBoostingClassifier instance
gb = GradientBoostingClassifier()

In [None]:
# Create GridSearchCV instance
grid_search = GridSearchCV(gb, param_grid=param_grid, cv=3)

In [None]:
# Fit GridSearchCV to training data
grid_search.fit(X_train, y_train)

In [None]:
# Print best hyperparameters
print(grid_search.best_params_)

In [None]:
gb_best =  grid_search.best_estimator_

pred = gb_best.predict(X_test)

In [None]:
accuracy_score(y_test, pred)

In [None]:
confusion_matrix(y_test, pred)

# ExtraTrees Classifier
ExtraTrees Classifier is an ensemble tree-based machine learning approach that uses randomization to reduce variance and computational cost compared to Random Forest. It can be used for classification or regression, in scenarios where computational cost is a concern and where the data is not normally distributed. ExtraTrees Classifier creates a group of unpruned decision trees using the traditional top-down method and aggregates the results from the group of decision trees to output a prediction.

## Differences between Extra Trees and Random Forest
Extra Trees and Random Forest are two similar ensemble methods that construct multiple trees during training time over the entire dataset. However, there are some differences between them. In Extra Trees, it selects a random split to divide the parent node into two random child nodes, while in Random Forest, it selects the best split among a random subset of features. Additionally, Extra Trees constructs trees over every observation in the dataset but with different subsets of features.

## Implementing ExtraTrees Classifier in Scikit-Learn
Scikit-Learn provides an implementation of ExtraTrees Classifier through the "ExtraTreesClassifier" class. To use it, you can follow these steps:

1. Import the necessary modules: "ExtraTreesClassifier" from "sklearn.ensemble", "GridSearchCV" from "sklearn.model_selection", and any other necessary modules.

2. Define the parameter grid for the hyperparameters you want to tune. For example, you might want to tune the number of trees "n_estimators", the maximum depth of each tree "max_depth", and the minimum number of samples required to split an internal node "min_samples_split".

3. Create an instance of the "ExtraTreesClassifier" with default hyperparameters.

4. Create an instance of "GridSearchCV" with the "ExtraTreesClassifier" instance, the parameter grid, and any other necessary arguments such as cross-validation folds.

5. Fit the "GridSearchCV" instance to your training data.

6. Access the best estimator using the "best_estimator_" attribute of the "GridSearchCV" instance.

7. Use the best estimator to make predictions on new data.

In [None]:
# Define parameter grid
param_grid = {
    'n_estimators': [100, 500, 1000],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10]
}

In [None]:
# Create ExtraTreesClassifier instance
et = ExtraTreesClassifier()

In [None]:
# Create GridSearchCV instance
grid_search = GridSearchCV(et, param_grid=param_grid, cv=5)

In [None]:
# Fit GridSearchCV to training data
grid_search.fit(X_train, y_train)

In [None]:
et_best = grid_search.best_estimator_

pred = et_best.predict(X_test)

In [None]:
accuracy_score(y_test, pred)

In [None]:
confusion_matrix(y_test, pred)

# Bagging Classifier
Bagging Classifier is an ensemble meta-estimator that fits base classifiers on random subsets of the original dataset and then aggregates their individual predictions to form a final prediction. It is an easy-to-use and effective method for improving the performance of a single model. Bagging stands for Bootstrap Aggregating, which is an ensemble machine learning technique that combines the predictions of multiple models to improve the overall performance of the system. The Bagging Classifier uses bootstrap resampling to generate multiple different subsets of the training data
## Implementing Bagging Classifier in Scikit-Learn
Scikit-Learn provides an implementation of Bagging Classifier through the "BaggingClassifier" class. To use it, you can follow these steps:

1. Import the necessary modules: "BaggingClassifier" from "sklearn.ensemble", "GridSearchCV" from "sklearn.model_selection", and any other necessary modules.

2. Define the parameter grid for the hyperparameters you want to tune. For example, you might want to tune the number of trees "n_estimators", the maximum depth of each tree "max_depth", and the minimum number of samples required to split an internal node "min_samples_split".

3. Create an instance of the "BaggingClassifier" with default hyperparameters.

4. Create an instance of "GridSearchCV" with the Bagging Classifier instance, the parameter grid, and any other necessary arguments such as cross-validation folds.

5. Fit the GridSearchCV instance to your training data.

6. Access the best estimator using the "best_estimator_" attribute of the "GridSearchCV" instance.

7. Use the best estimator to make predictions on new data.

In [None]:
# Define parameter grid
param_grid = {
    'n_estimators': [100, 500, 1000],
    'max_samples': [0.5, 1.0],
    'max_features': [0.5, 1.0]
}

In [None]:
# Create BaggingClassifier instance
bc = BaggingClassifier()

In [None]:
# Create GridSearchCV instance
grid_search = GridSearchCV(bc, param_grid=param_grid, cv=3)

In [None]:
# Fit GridSearchCV to training data
grid_search.fit(X_train, y_train)

In [None]:
bc_best = grid_search.best_estimator_

pred = bc_best.predict(X_test)

In [None]:
accuracy_score(y_test, pred)

In [None]:
confusion_matrix(y_test, pred)

# Logistic Regression
Logistic regression is a type of supervised learning algorithm used for classification tasks. It estimates the probability of an event occurring based on one or more independent variables.The output of logistic regression is a binary value (0 or 1, yes/no, true/false).

Here are the general steps to perform logistic regression using scikit-learn:

1. Import the necessary packages, classes, and functions.
2. Load the data.
3. Transform the data if necessary.
4. Fit the logistic regression model to the data.
5. Evaluate the performance of the model.

In [None]:
# Define parameter grid
param_grid = {
    'Cs': [0.1, 1, 10],
    'penalty': ['l1', 'l2']
}

In [None]:
# Create LogisticRegressionCV instance
lr = LogisticRegressionCV()

In [None]:
# Create GridSearchCV instance
grid_search = GridSearchCV(lr, param_grid=param_grid, cv=3)

In [None]:
# Fit GridSearchCV to training data
grid_search.fit(X_train, y_train)

In [None]:
lr_best = grid_search.best_estimator_

pred = lr_best.predict(X_test)

In [None]:
accuracy_score(y_test, pred)

In [None]:
confusion_matrix(y_test, pred)

# Support Vector Classifier
Support Vector Classification (SVC) is a type of supervised learning algorithm used for classification tasks. It works by finding the hyperplane that best separates the classes in the feature space. The hyperplane is chosen such that it maximizes the margin between the classes, which is defined as the distance between the hyperplane and the closest data points from each class. The data points that are closest to the hyperplane are known as support vectors.

## Implementation in Scikit-learn
Scikit-learn provides the "SVC" class for fitting the model. The parameters used by this module include:
- "C": Penalty parameter C of the error term.
- "kernel": Specifies the kernel type to be used in the algorithm (linear, polynomial, radial basis function (RBF), sigmoid).
- "degree": Degree of the polynomial kernel function ('poly').
- "gamma": Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
- "coef)": Independent term in kernel function.

SVC can be used for binary classification problems as well as multi-class classification problems. In scikit-learn, multi-class SVC can be performed using either one-vs-one or one-vs-rest methods.
Overall, SVC is a powerful tool for classification tasks and can be easily implemented using scikit-learn's built-in functions and classes.

In [None]:
# Define parameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': [0.1, 1, 10]
}


In [None]:
# Create SVC instance
svc = SVC()

In [None]:
# Create GridSearchCV instance
grid_search = GridSearchCV(svc, param_grid=param_grid, cv=3)

In [None]:
# Fit GridSearchCV to training data
grid_search.fit(X_train, y_train)

In [None]:
svc_best = grid_search.best_estimator_

pred = svc_best.predict(X_test)

In [None]:
accuracy_score(y_test, pred)

In [None]:
confusion_matrix(y_test, pred)

# Ensembling

In [None]:
# Create an instance of the VotingClassifier class
ensemble_clf = VotingClassifier(estimators=[('rf', rf_best), ('gb', gb_best),
                                           ('et', et_best), ('bc', bc_best),
                                           ('svc', svc_best)], voting='hard')

In [None]:
pred = svc_best.predict(X_test)

In [None]:
accuracy_score(y_test, pred)

In [None]:
confusion_matrix(y_test, pred)