In [1]:
samples = [
    [22,'Female',25,14,4,27,'Basic','Monthly',598,9,1],
    [41,'Female',28,28,7,13,'Standard','Monthly',584,20,0],
    [47,'Male',27,10,2,29,'Premium','Annual',757,21,0],
    [35,'Male',9,12,5,17,'Premium','Quarterly',232,18,0],
    [53,'Female',58,24,9,2,'Standard','Annual',533,18,0],
    [30,'Male',41,14,10,10,'Premium','Monthly',500,29,0],
    [47,'Female',37,15,9,28,'Basic','Quarterly',574,14,1],
    [54,'Female',36,11,0,18,'Standard','Monthly',323,16,0],
    [36,'Male',20,5,10,8,'Basic','Monthly',687,8,0],
    [65,'Male',8,4,2,23,'Basic','Annual',995,10,0],
    [46,'Female',42,27,9,21,'Standard','Annual',526,3,1],
    [56,'Male',13,23,5,14,'Basic','Quarterly',187,1,0],
    [31,'Male',2,7,0,25,'Premium','Quarterly',758,24,0],
    [42,'Male',46,27,5,8,'Premium','Quarterly',438,30,0],
    [59,'Male',21,17,2,14,'Premium','Quarterly',663,15,0],
    [35,'Female',1,3,7,3,'Basic','Monthly',677,25,1]
]


In [2]:
import numpy as np
import pickle
import joblib
import pandas as pd

# Load encoders / model
ct = pickle.load(open("ct.pkl", "rb"))
sc = pickle.load(open("scaler.pkl", "rb"))
le = pickle.load(open("le_gender.pkl", "rb"))
classifier = joblib.load("classifier.pkl")


In [3]:
print("Index | True | Predicted | Match?")
print("------------------------------------")

for idx, row in enumerate(samples):
    true_value = row[-1]       # last column is TRUE churn label
    input_data = row[:-1]      # all except label

    # convert to numpy array & reshape
    input_data = np.array(input_data, dtype=object).reshape(1, -1)

    # encode gender
    input_data[:, 1] = le.transform(input_data[:, 1])

    # one-hot encode
    input_data = ct.transform(input_data)

    # scale
    input_data = sc.transform(input_data)

    # prediction
    pred = classifier.predict(input_data)[0]

    match = (pred == true_value)

    print(f"{idx+1:5} |  {true_value}   |     {pred}     |  {match}")


Index | True | Predicted | Match?
------------------------------------
    1 |  1   |     1.0     |  True
    2 |  0   |     0.0     |  True
    3 |  0   |     0.0     |  True
    4 |  0   |     0.0     |  True
    5 |  0   |     0.0     |  True
    6 |  0   |     1.0     |  False
    7 |  1   |     1.0     |  True
    8 |  0   |     0.0     |  True
    9 |  0   |     0.0     |  True
   10 |  0   |     1.0     |  False
   11 |  1   |     1.0     |  True
   12 |  0   |     0.0     |  True
   13 |  0   |     0.0     |  True
   14 |  0   |     0.0     |  True
   15 |  0   |     0.0     |  True
   16 |  1   |     1.0     |  True


In [4]:

from sklearn.metrics import accuracy_score, confusion_matrix

# -----------------------------
# LOAD DATA
# -----------------------------
train_data = pd.read_csv('../customer_churn_dataset-training-master.csv')
test_data = pd.read_csv('../customer_churn_dataset-testing-master.csv')
data = pd.concat([train_data, test_data], ignore_index=True)

# Drop missing rows
data.dropna(inplace=True)

# -----------------------------
# DROP CUSTOMER ID
# -----------------------------
data.drop(['CustomerID'], axis=1, inplace=True)

# Separate into features & labels
X = data.iloc[:, :-1].values      # all columns except last
y_true = data.iloc[:, -1].values  # last column is churn label

# -----------------------------
# LOAD PREPROCESSORS & MODEL
# -----------------------------

# -----------------------------
# PREPROCESS
# -----------------------------
# Encode gender (column index 1)
X[:, 1] = le.transform(X[:, 1])

# One hot encode categorical columns
X = ct.transform(X)

# Scale features
X = sc.transform(X)

# -----------------------------
# PREDICT
# -----------------------------
y_pred = classifier.predict(X)



# -----------------------------
# ACCURACY & CONFUSION MATRIX
# -----------------------------
acc = accuracy_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred)

print("\nOverall Accuracy:", acc)
print("\nConfusion Matrix:\n", cm)



Overall Accuracy: 0.9856256655700842

Confusion Matrix:
 [[217779   6935]
 [   327 280165]]


In [5]:

from sklearn.metrics import accuracy_score, confusion_matrix

# -----------------------------
# LOAD DATA
# -----------------------------
 
data = pd.read_csv('../customer_churn_dataset-testing-master.csv')


# Drop missing rows
data.dropna(inplace=True)

# -----------------------------
# DROP CUSTOMER ID
# -----------------------------
data.drop(['CustomerID'], axis=1, inplace=True)

# Separate into features & labels
X = data.iloc[:, :-1].values      # all columns except last
y_true = data.iloc[:, -1].values  # last column is churn label



# -----------------------------
# PREPROCESS
# -----------------------------
# Encode gender (column index 1)
X[:, 1] = le.transform(X[:, 1])

# One hot encode categorical columns
X = ct.transform(X)

# Scale features
X = sc.transform(X)

# -----------------------------
# PREDICT
# -----------------------------
y_pred = classifier.predict(X)



# -----------------------------
# ACCURACY & CONFUSION MATRIX
# -----------------------------
acc = accuracy_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred)

print("\nOverall Accuracy:", acc)
print("\nConfusion Matrix:\n", cm)



Overall Accuracy: 0.8920682263025445

Confusion Matrix:
 [[26947  6934]
 [   14 30479]]
