# Test IBM Telco Churn model build after training

Here are steps needed and key feature engineering proceduce similarly done during training.

- Import Data from file
- Split the data for test
- Missing Data imputation
- Ordinal label encoding
- Test the data

In [1]:
import unittest
import pandas as pd 
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split # split data into training and testing sets
from sklearn.metrics import balanced_accuracy_score, roc_auc_score, make_scorer # for scoring during cross validation
from sklearn.model_selection import GridSearchCV # cross validation
from sklearn.metrics import confusion_matrix # create a confusion matrix
from sklearn.metrics import classification_report # create classification report
from sklearn.metrics import plot_confusion_matrix # draws a confusion matirx
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.preprocessing import LabelEncoder # label encode for categorical features
from feature_engine.encoding import OrdinalEncoder # For integer encoding using
import pickle
import random
file_name = "xgb_IBM_Churn.pkl"

  import pandas.util.testing as tm
In d:\software\programming\anaconda3\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle: 
The text.latex.preview rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In d:\software\programming\anaconda3\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle: 
The mathtext.fallback_to_cm rcparam was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In d:\software\programming\anaconda3\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle: Support for setting the 'mathtext.fallback_to_cm' rcParam is deprecated since 3.3 and will be removed two minor releases later; use 'mathtext.fallback : 'cm' instead.
In d:\software\programming\anaconda3\lib\site-packages\matplotlib\mpl-data\stylelib\_classic_test.mplstyle: 
The validate_bool_maybe_none function was deprecated in Matplotlib 3.3 and will be removed two minor releases later.
In d:\software\progra

## Importing the data

In [2]:
df = pd.read_csv('Telco_customer_churn.csv')

## Drop Unwanted Columns

In [3]:
df.drop(['Churn Label', 'Churn Score', 'CLTV', 'Churn Reason'],
       axis=1, inplace=True) ## set axis=0 to remove rows, axis=1 to remove columns

## Drop Constants and Duplicated Columns

In [4]:
df.drop(['CustomerID', 'Count', 'Country', 'State', 'Lat Long'],
       axis=1, inplace=True) ## set axis=0 to remove rows, axis=1 to remove c

In [5]:
df.columns = df.columns.str.replace(' ', '_')

## Missing Data imputation

In [6]:
df.loc[(df['Total_Charges'] == ' '), 'Total_Charges'] = 0

In [7]:
df['Total_Charges'] = pd.to_numeric(df['Total_Charges'])

## Format Data: Split the Data into Dependent and Independent Variables

In [8]:
X = df.drop('Churn_Value', axis=1).copy()
y = df['Churn_Value'].copy()

## Format Data: Ordinal Label Encoding

In [9]:
ordinal_encoder = OrdinalEncoder(
    encoding_method='ordered',
    variables=['City',
                'Gender',
                'Senior_Citizen',
                'Partner',
                'Dependents',
                'Phone_Service',
                'Multiple_Lines',
                'Internet_Service',
                'Online_Security',
                'Online_Backup',
                'Device_Protection',
                'Tech_Support',
                'Streaming_TV',
                'Streaming_Movies',
                'Contract',
                'Paperless_Billing', 
                'Payment_Method'])
X_encoded = ordinal_encoder.fit_transform(X, y)

## Format Data: Split test data

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y,
                                                   random_state=101, stratify=y)

## Load the trained model

In [11]:
# Load 
xgb_model_loaded = pickle.load(open(file_name, "rb"))

In [12]:
data_index = random.randint(0, X_test.shape[0])
print(data_index)

1125


In [13]:
predicted = xgb_model_loaded.predict(pd.DataFrame(X_test.iloc[data_index]).T)[0]
actual = y_test.iloc[data_index]

In [14]:
print(f'Predicted: {predicted}')
print(f'Actual: {actual}')

Predicted: 0
Actual: 0


In [15]:
precision, recall, fscore, support = score(y_test,
                                           xgb_model_loaded.predict(X_test),
                                           average='macro')

In [16]:
print(f'Recall: {recall}')

Recall: 0.8299754425796544


In [58]:
class TestXGB_Model(unittest.TestCase):
    
    def TestRandomSampleOfData(self):
        iTest = 10
        verificationErrors = []
        for i in range(iTest):
            print(f'Loop: {i+1}')
            # Random pick an index
            data_index = random.randint(0, X_test.shape[0])
            print(f'Index: {data_index}')
            predicted = xgb_model_loaded.predict(pd.DataFrame(X_test.iloc[data_index]).T)[0]
            actual = y_test.iloc[data_index]
            print(f'Actual: {actual}')
            print(f'Predicted: {predicted}')
            try:
                self.assertEqual(actual, predicted)
            except AssertionError:
                print('Pridiction not same!')
            finally:
                print('\n')
    
    def TestAccuracy(self):
        PassThreshold = 0.8
        precision, recall, fscore, support = score(y_test,
                                           xgb_model_loaded.predict(X_test),
                                           average='macro')
        print(f'Current model recall: {recall}')
        self.assertGreaterEqual(recall, PassThreshold, 'Recall test')

In [59]:
obj = TestXGB_Model()

In [60]:
obj.TestRandomSampleOfData()

Loop: 1
Index: 91
Actual: 0
Predicted: 0


Loop: 2
Index: 608
Actual: 1
Predicted: 1


Loop: 3
Index: 1387
Actual: 0
Predicted: 0


Loop: 4
Index: 538
Actual: 0
Predicted: 0


Loop: 5
Index: 889
Actual: 1
Predicted: 1


Loop: 6
Index: 436
Actual: 0
Predicted: 0


Loop: 7
Index: 67
Actual: 0
Predicted: 1
Pridiction not same!


Loop: 8
Index: 1224
Actual: 0
Predicted: 0


Loop: 9
Index: 647
Actual: 0
Predicted: 0


Loop: 10
Index: 83
Actual: 0
Predicted: 0




In [61]:
obj.TestAccuracy()

Current model recall: 0.8299754425796544
