<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Read-'clean_churn.csv'" data-toc-modified-id="Read-'clean_churn.csv'-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Read 'clean_churn.csv'</a></span></li><li><span><a href="#Random_Forest-Parameter-Hypertuning-&amp;-Grid-Search" data-toc-modified-id="Random_Forest-Parameter-Hypertuning-&amp;-Grid-Search-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Random_Forest Parameter Hypertuning &amp; Grid Search</a></span></li><li><span><a href="#Best-Random-Forest-Model-Train/Test-Statistics" data-toc-modified-id="Best-Random-Forest-Model-Train/Test-Statistics-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Best Random Forest Model Train/Test Statistics</a></span></li></ul></div>

# Random_Forest_Model Using 'clean_churn.csv'

## Read 'clean_churn.csv'

In [24]:
# Common Python Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# SKLearn Libraries
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, f1_score, recall_score


In [25]:
filename = 'clean_churn.csv'
df = pd.read_csv('clean_churn.csv')

# Splitting the dataset features into X and y
df.drop(['Unnamed: 0', 'T_D_Min', 'T_E_Min', 'T_N_Min', 'T_I_Min'],
        axis=1,
        inplace=True)
x = df.drop(['Churned'], axis=1)
y = df.Churned

x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=100)

print('\nFile name of run =\033[1m', filename, '\033[0m \n')

print(55 * '=', '\n')


File name of run =[1m clean_churn.csv [0m 




## Random_Forest Parameter Hypertuning & Grid Search

- Random Forest Classifier was run with [GridSearchCV](https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html) and [RandomForestClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html)


- Random Forest hypertuning parameter ranges include:
   - 'n_estimators': list(range(50,150,10)),
   - 'criterion':['gini', 'entropy'],
   - 'max_leaf_nodes': list(range(2,12,1)),
   - 'max_depth': list(range(10,20,1))


- GridSearchCV hypertuning parameter ranges include:
    - scoring='accuracy',
    - n_jobs=10,
    - cv=3,
    - verbose=0

In [26]:
import time
start = time.perf_counter()

grid = [{
    'n_estimators': list(range(50,150,10)),
    'criterion':['gini', 'entropy'],
    'max_leaf_nodes': list(range(2,12,1)),
    'max_depth': list(range(10,20,1))
}]

rf_search = GridSearchCV(estimator=RandomForestClassifier(verbose=0),
                         param_grid = grid,
                         scoring='accuracy',
                         n_jobs=10,
                         cv=3, 
                         verbose=0)

rf_search.fit(x_train, y_train)

top = time.perf_counter()
print(f'\nDetermined grid search in {time.perf_counter() - start:0.4f} seconds.')

KeyboardInterrupt: 

In [None]:
print('\nBest hyperparameters:', rf_search.best_params_)
print('\nAccuracy:', rf_search.best_score_)

## Best Random Forest Model Train/Test Statistics

In [None]:
import time
start = time.perf_counter()

rf_class = RandomForestClassifier(criterion='entropy',
                                  max_depth=12,
                                  max_leaf_nodes=9,
                                  n_estimators=50,
                                  verbose=0,
                                  n_jobs=10)

rf_class.fit(x_train, y_train)

y_pred = rf_class.predict(x_test)

from sklearn.metrics import confusion_matrix, classification_report

cm = confusion_matrix(y_test, y_pred)

conf_matrix = pd.DataFrame(data=cm,
                           columns=['Predicted:0', 'Predicted:1'],
                           index=['Actual:0', 'Actual:1'])

sns.heatmap(conf_matrix, annot=True, fmt='d', cmap="YlGnBu")

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


print(f'\nRun time {time.perf_counter() - start:0.4f} seconds.')