In [None]:
import tensorflow as tf
import matplotlib.pylab as plt
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [None]:
filepath = 'telco_customer_churn.csv'
df = pd.read_csv(filepath)
df = df.drop(columns=['customerID'])
df.sample(5)

In [None]:
df.dtypes

In [None]:
features_cat = ['gender', 'SeniorCitizen', 'Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'InternetService', 'OnlineSecurity',
               'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling', 'PaymentMethod']
features_num = ['tenure', 'MonthlyCharges', 'TotalCharges']

In [None]:
for col in features_cat:
    df[col] = df[col].astype('category')

for col in features_num:
    try:
        df[col] = df[col].astype('float')
    except:
        df[col] = pd.to_numeric(df[col], errors='coerce')

df.dtypes

In [None]:
# Check for missing values
for c in df.columns:
    print(f'{c}: {df[c].isna().sum()}')

In [None]:
df = df.dropna()

In [None]:
np.unique(df['Churn'], return_counts=True)

In [None]:
from FeatureEncoder import FeatureEncoder
from sklearn.preprocessing import LabelEncoder

y = df['Churn'].values
X = df.drop(columns=['Churn'])

args = {'dataframe': X, 'features_cat': features_cat, 'features_num': features_num}

feat_enc = FeatureEncoder(**args)
feat_enc.encode()
X = feat_enc.get_encoded_features()

labelEnc = LabelEncoder()
y = labelEnc.fit_transform(y)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_tmp, y_train, y_tmp = train_test_split(X, y, train_size=0.7, random_state=0)
X_vald, X_test, y_vald, y_test = train_test_split(X_tmp, y_tmp, train_size=0.5, random_state=0)

print(X_train.shape)
print(X_vald.shape)
print(X_test.shape)

print(np.unique(y_train, return_counts=True))
print(np.unique(y_vald, return_counts=True))
print(np.unique(y_test, return_counts=True))

In [None]:
def array_to_dataset(data, target, shuffle=True, batch_size=64):
    ds = tf.data.Dataset.from_tensor_slices((data, target))
    if shuffle:
        ds = ds.shuffle(batch_size*2).batch(batch_size).prefetch(batch_size)
    else:
        ds = ds.batch(batch_size)
    return ds

train_ds = array_to_dataset(X_train, y_train)
vald_ds = array_to_dataset(X_vald, y_vald, shuffle=False)
test_ds = array_to_dataset(X_test, y_test, shuffle=False)

In [None]:
import IterativeFeatureExclusion as IFE

n_features = X_train.shape[1]
_, counts = np.unique(y_train, return_counts=True)
n_classes = len(counts)
hidden_size = 32
r = 5.6498

print(f'Number of classes: {n_classes}')
model = IFE.IFENetClassifier(n_features=n_features, n_classes=n_classes, hidden_size=hidden_size, r=r)

loss_fn = tf.keras.losses.BinaryCrossentropy()
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

checkpoint_path = 'checkpoints/ifeNet_telco.weights.h5'

callbacks = [tf.keras.callbacks.EarlyStopping(patience=10, monitor='val_loss'),
             tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, monitor='val_accuracy')]

epochs = 50
batch_size = 64
model.compile(optimizer=optimizer, loss=loss_fn, metrics=['accuracy'])

In [None]:
history = model.fit(train_ds, validation_data=vald_ds, epochs=epochs, callbacks=callbacks)

In [None]:
model.load_weights(checkpoint_path)

In [None]:
y_pred = np.empty((0,))
y_test = np.empty((0,))

for data,label in test_ds:
    y_hat = model(data)
    y_hat = np.round(y_hat)
    y_pred = np.append(y_pred, y_hat.ravel())

    label = label.numpy()
    y_test = np.append(y_test, label.ravel())

In [None]:
print(accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

In [None]:
feat_scores = feat_enc.get_feature_scores(input_scores = model.input_scores)