# Problem Statement

Given a customer profiling/churn dataset, generate meaningful customer templates which can aid the creation of more personalized marketing campaigns.

Dataset Used: https://www.kaggle.com/blastchar/telco-customer-churn

## Import Packages

In [1]:
import numpy as np
import pandas as pd
from deepART import FuzzyART, FeatureTransformer

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


## Import Data

In [2]:
raw_df = pd.read_csv("telco_customer_churn.csv")
raw_df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


## Preprocess Data

In [3]:
raw_df["MonthlyCharges"] = raw_df["MonthlyCharges"].apply(float)
raw_df["TotalCharges"] = raw_df["TotalCharges"].replace(' ',None)
raw_df["TotalCharges"] = raw_df["TotalCharges"].apply(float)
feat_transformer = FeatureTransformer() # initialize FeatureTransformer object
feat_transformer.set_binary({"gender": "Male", "PaperlessBilling": "Yes",
                            "PhoneService": "Yes", "Partner": "Yes", "SeniorCitizen": 1,
                            "Dependents": "Yes"}) # Set binarized columns, dictionary values are values deemed as positive, i.e. given value of 1
feat_transformer.set_normalizer({"MonthlyCharges": "max", "TotalCharges": "max"}) # Set normalized columns, dictionary values are normalization factors
one_hot_cols = [i for i in raw_df.columns if i not in ["MonthlyCharges", "TotalCharges", "gender", "Churn",
                                                   "customerID", "tenure", "PaperlessBilling", "PhoneService",
                                                  "Partner", "SeniorCitizen", "Dependents"]]
feat_transformer.set_one_hot(one_hot_cols)
feat_transformer.set_binning({"tenure": 6})
preprocessed_df = feat_transformer.fit_transform(raw_df.copy())
preprocessed_df.head()

Unnamed: 0,MultipleLines_No,MultipleLines_No phone service,MultipleLines_Yes,InternetService_DSL,InternetService_Fiber optic,InternetService_No,OnlineSecurity_No,OnlineSecurity_No internet service,OnlineSecurity_Yes,OnlineBackup_No,...,MonthlyCharges,TotalCharges,gender,PaperlessBilling,PhoneService,Partner,SeniorCitizen,Dependents,Churn,customerID
0,0,1,0,1,0,0,1,0,0,0,...,0.251368,0.003437,0,1,0,1,0,0,No,7590-VHVEG
1,1,0,0,1,0,0,0,0,1,1,...,0.479579,0.217564,1,0,1,0,0,0,No,5575-GNVDE
2,1,0,0,1,0,0,0,0,1,0,...,0.453474,0.012453,1,1,1,0,0,0,Yes,3668-QPYBK
3,0,1,0,1,0,0,0,0,1,1,...,0.356211,0.211951,1,0,0,0,0,0,No,7795-CFOCW
4,1,0,0,0,1,0,1,0,0,1,...,0.595368,0.017462,0,1,1,0,0,0,Yes,9237-HQITU


In [4]:
preprocessed_df[[i for i in preprocessed_df.columns if "tenure_" in i]].describe()

Unnamed: 0,tenure_-0.07200000000000001-12.0,tenure_12.0-24.0,tenure_24.0-36.0,tenure_36.0-48.0,tenure_48.0-60.0,tenure_60.0-72.0
count,7043.0,7043.0,7043.0,7043.0,7043.0,7043.0
mean,0.310379,0.145393,0.118131,0.108193,0.118131,0.199773
std,0.462682,0.352521,0.322787,0.310645,0.322787,0.399858
min,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0
75%,1.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0


## Train FuzzyART

In [5]:
X = preprocessed_df.drop(["customerID", "Churn"], axis=1).values

convergence = 3 # 5
neurons = 20
rho = 0.233
beta = 0.8
alpha = 0.1
network = FuzzyART( X.shape[-1], neurons, rho=rho, alpha=alpha, beta=beta,comp=True,fastEncode=0)

#convergence tracking variables
prev_active = 0
converge = 0
results = []
active_nodes = []
while True:
    for I in X:
#         print(I.ravel())
        Z, k = network.predict(I.ravel()) 
#         if not k==None:
#             print("zJ:\t{}\n".format(Z))
#             print("Cluster:\t{}\n".format(k))
#             print("\n\n\n\n")
#         else:
#             print("Unrecognized pattern:\n")

    active_nodes.append(network.active)

    if (prev_active < network.active):
        prev_active = network.active
        continue
    else:
        converge +=1
        if converge > convergence:
#             print("Total Neurons Learned: {}\n\n".format(network.active))
            break

## Obtain Predictions

In [6]:
#Predict
pred = []
for I in X:
        Z, k = network.predict(I.ravel(), learn=False) 
        if not k==None:
#             print("zJ:\t{}\n".format(Z))
#             print("Cluster:\t{}\n".format(k))
#             print("\n\n")
            pred.append(k)
        else:
#             print("Unrecognized pattern:\n")
            pred.append(-1)
print("Unique Values and Counts...\n\n")
print(pd.Series(pred).value_counts())

Unique Values and Counts...


2     2196
9     1523
15    1104
12     826
10     537
13     454
14     288
3       71
8       33
16       8
1        3
dtype: int64


## Extract Raw Customer Templates

In [7]:
templates_matrix = np.array([]).reshape(0, X.shape[-1])
for idx in set(pred):
    templates_matrix = np.vstack((templates_matrix, network.z[0, :, idx]))
raw_templates_df = pd.DataFrame({"template_index": list(set(pred))})
raw_templates_df = pd.concat([raw_templates_df, pd.DataFrame(templates_matrix, columns=preprocessed_df.drop(["customerID", "Churn"], axis=1).columns)], axis=1)
raw_templates_df.head()

Unnamed: 0,template_index,MultipleLines_No,MultipleLines_No phone service,MultipleLines_Yes,InternetService_DSL,InternetService_Fiber optic,InternetService_No,OnlineSecurity_No,OnlineSecurity_No internet service,OnlineSecurity_Yes,...,tenure_48.0-60.0,tenure_60.0-72.0,MonthlyCharges,TotalCharges,gender,PaperlessBilling,PhoneService,Partner,SeniorCitizen,Dependents
0,1,3.2768e-11,5.764608e-42,1.7592190000000001e-31,5.764608e-42,7.205759e-40,0.008,2.882304e-41,0.008,1.441152e-40,...,2.81475e-34,1.407375e-33,0.1616,0.002231,1.6777220000000003e-17,1.441152e-40,1.0,8.388608000000001e-17,5.764608e-42,1.6777220000000003e-17
1,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.342686,0.005295,0.0,0.0,3.7414440000000002e-118,0.0,0.0,0.0
2,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.375579,0.005256,0.0,0.0,1.0,0.0,0.0,0.0
3,8,2.923003e-113,0.04,1.169201e-114,0.04,5.846007e-114,5.846007e-114,0.04,5.846007e-114,5.846007e-114,...,1.169201e-114,1.169201e-114,0.180002,0.0027,6.044629e-56,5.902958e-49,2.923003e-113,2.177807e-94,2.923003e-113,2.923003e-113
4,9,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.2,0.0,...,0.0,0.0,0.153685,0.002165,0.0,0.0,1.0,0.0,0.0,0.0


## Restore Raw Customer Templates to Original Form

In [8]:
feat_transformer.inverse_transform(raw_templates_df)

Unnamed: 0,template_index,MonthlyCharges,TotalCharges,gender,PaperlessBilling,PhoneService,Partner,SeniorCitizen,Dependents,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaymentMethod,tenure
0,1,19.190004,19.378241,Female,No,Yes,No,0,No,No,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,Two year,Mailed check,-0.07200000000000001-12.0
1,2,40.69391,45.98249,Female,No,No,No,0,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Electronic check,-0.07200000000000001-12.0
2,3,44.600054,45.647424,Female,No,Yes,No,0,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Electronic check,-0.07200000000000001-12.0
3,8,21.37528,23.450272,Female,No,No,No,0,No,No phone service,DSL,No,No,No,No,No,No,Month-to-month,Electronic check,-0.07200000000000001-12.0
4,9,18.25007,18.800034,Female,No,Yes,No,0,No,No phone service,No,No internet service,No internet service,No internet service,No internet service,No internet service,No internet service,Month-to-month,Electronic check,-0.07200000000000001-12.0
5,10,32.9628,819.31255,Female,No,No,No,0,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Electronic check,-0.07200000000000001-12.0
6,12,24.124663,24.2001,Female,No,No,No,0,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Electronic check,-0.07200000000000001-12.0
7,13,23.900219,214.789283,Female,No,No,No,0,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Electronic check,-0.07200000000000001-12.0
8,14,40.921264,664.40621,Female,No,No,No,0,No,No phone service,DSL,No,Yes,No,No,No,No,Month-to-month,Electronic check,-0.07200000000000001-12.0
9,15,35.804288,1317.086134,Female,No,No,No,0,No,Yes,DSL,No,Yes,No,No,No,No,Month-to-month,Electronic check,-0.07200000000000001-12.0
