# Churn Modeling 
Binary classification using embeddings of categorical features.

#### Dataset:
The dataset used for this experiment is provided in https://github.com/sharmaroshan/Churn-Modelling-Dataset

Balance of Data: 
{1: 0.2037, 0: 0.7963}

### Pipelines:
1. Package categorical features together and separate those from numerical features.
2. Package each categorical features separately and separate from numerical features.

### Modeling:
Used both Tensorflow and Pytorch to achieve an 85% accuracy on test data. The aim was to compare the performance of these two packages on the same dataset. 

It would be nice to compare with the performance of tree-based models.



In [75]:
import pandas as pd

url = 'https://raw.githubusercontent.com/sharmaroshan/Churn-Modelling-Dataset/master/Churn_Modelling.csv'
df = pd.read_csv(url, index_col=0)
print(df.shape)
print(df.info())
df.head()

(10000, 13)
<class 'pandas.core.frame.DataFrame'>
Int64Index: 10000 entries, 1 to 10000
Data columns (total 13 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CustomerId       10000 non-null  int64  
 1   Surname          10000 non-null  object 
 2   CreditScore      10000 non-null  int64  
 3   Geography        10000 non-null  object 
 4   Gender           10000 non-null  object 
 5   Age              10000 non-null  int64  
 6   Tenure           10000 non-null  int64  
 7   Balance          10000 non-null  float64
 8   NumOfProducts    10000 non-null  int64  
 9   HasCrCard        10000 non-null  int64  
 10  IsActiveMember   10000 non-null  int64  
 11  EstimatedSalary  10000 non-null  float64
 12  Exited           10000 non-null  int64  
dtypes: float64(2), int64(8), object(3)
memory usage: 1.1+ MB
None


Unnamed: 0_level_0,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
RowNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [93]:
from collections import Counter
c = Counter(df['Exited'])
precentages = {k:v/df.shape[0] for k,v in c.items()}
print(c)
print(f'percentages: {precentages}')

Counter({0: 7963, 1: 2037})
percentages: {1: 0.2037, 0: 0.7963}


In [77]:
categorical_columns = ['Geography', 'Gender', 'HasCrCard', 'IsActiveMember']
numerical_columns = ['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'EstimatedSalary']
outputs = ['Exited']

cat_count = len(categorical_columns)
num_count = len(numerical_columns)

print(cat_count + num_count)
categorical_embedding_sizes = {}

for category in categorical_columns:
    df[category] = df[category].astype('category')
    cz = len(df[category].cat.categories)
    categorical_embedding_sizes[category] = (cz,min(50, (cz+1)//2))
    df[category] = df[category].cat.codes.values

print(categorical_embedding_sizes)

10
{'Geography': (3, 2), 'Gender': (2, 1), 'HasCrCard': (2, 1), 'IsActiveMember': (2, 1)}


In [78]:
from sklearn.preprocessing import MinMaxScaler

mns = MinMaxScaler()
df[numerical_columns] = mns.fit_transform(df[numerical_columns])
df.head()

Unnamed: 0_level_0,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
RowNumber,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,15634602,Hargrave,0.538,0,0,0.324324,0.2,0.0,0.0,1,1,0.506735,1
2,15647311,Hill,0.516,2,0,0.310811,0.1,0.334031,0.0,0,1,0.562709,0
3,15619304,Onio,0.304,0,0,0.324324,0.8,0.636357,0.666667,1,0,0.569654,1
4,15701354,Boni,0.698,0,0,0.283784,0.1,0.0,0.333333,0,0,0.46912,0
5,15737888,Mitchell,1.0,2,0,0.337838,0.2,0.500246,0.0,1,1,0.3954,0


In [79]:
X = df[categorical_columns + numerical_columns]
y = df[outputs]

for col in categorical_columns:
  X[col] = X[col].astype(float)

X.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 10000 entries, 1 to 10000
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   Geography        10000 non-null  float64
 1   Gender           10000 non-null  float64
 2   HasCrCard        10000 non-null  float64
 3   IsActiveMember   10000 non-null  float64
 4   CreditScore      10000 non-null  float64
 5   Age              10000 non-null  float64
 6   Tenure           10000 non-null  float64
 7   Balance          10000 non-null  float64
 8   NumOfProducts    10000 non-null  float64
 9   EstimatedSalary  10000 non-null  float64
dtypes: float64(10)
memory usage: 859.4 KB


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [80]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=42)
X_train, X_eval, y_train, y_eval = train_test_split(X_train,y_train,test_size=0.1, random_state=42)

# Flattened Input

In [38]:
## Numpy array construction
import numpy as np

X_train_cat = np.stack([X_train[col].values for col in categorical_columns], 1)
X_train_num = np.stack([X_train[col].values for col in numerical_columns],1)
y_train = np.reshape(y_train.values,y_train.shape[0])
print(X_train_cat.shape,X_train_num.shape,y_train.shape)

X_test_cat = np.stack([X_test[col].values for col in categorical_columns], 1)
X_test_num = np.stack([X_test[col].values for col in numerical_columns],1)
y_test = np.reshape(y_test.values,y_test.shape[0])
print(X_test_cat.shape,X_test_num.shape,y_test.shape)

X_eval_cat = np.stack([X_eval[col].values for col in categorical_columns], 1)
X_eval_num = np.stack([X_eval[col].values for col in numerical_columns],1)
y_eval = np.reshape(y_eval.values,y_eval.shape[0])
print(X_eval_cat.shape,X_eval_num.shape,y_eval.shape)

(7200, 4) (7200, 6) (7200,)
(2000, 4) (2000, 6) (2000,)
(800, 4) (800, 6) (800,)


In [39]:
from collections import Counter
c = Counter(y_train)
print(c)
print(y_train)

Counter({0: 5709, 1: 1491})
[1 0 0 ... 0 0 0]


## Tensorflow Section

In [29]:
## Tensorflow tensor construction
import tensorflow as tf

X_train_cat = tf.convert_to_tensor(X_train_cat,dtype = tf.int64)
X_train_num = tf.convert_to_tensor(X_train_num,dtype = tf.float64)
y_train = tf.convert_to_tensor(y_train)

X_test_cat = tf.convert_to_tensor(X_test_cat,dtype = tf.int64)
X_test_num = tf.convert_to_tensor(X_test_num,dtype = tf.float64)
y_test = tf.convert_to_tensor(y_test)

X_eval_cat = tf.convert_to_tensor(X_eval_cat,dtype = tf.int64)
X_eval_num = tf.convert_to_tensor(X_eval_num,dtype = tf.float64)
y_eval = tf.convert_to_tensor(y_eval)

In [37]:
cat_input = tf.keras.Input(shape=(cat_count))
num_input = tf.keras.Input(shape=(num_count))
emb_layers = []
for i,col in enumerate(categorical_columns):
  em_size = categorical_embedding_sizes[col]
  emb_layers.append(tf.keras.layers.Embedding(em_size[0],em_size[1])(cat_input[:,i]))
merge = tf.keras.layers.Concatenate(axis=1)(emb_layers)
x2 = tf.keras.layers.concatenate([merge,num_input])
x3 = tf.keras.layers.Dense(5,activation='relu')(x2)
x7 = tf.keras.layers.Dense(2,activation='softmax')(x3)
out_layer = x7
model = tf.keras.Model(inputs=[cat_input,num_input], outputs=out_layer)
print(model.summary())

Model: "model_7"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_30 (InputLayer)          [(None, 4)]          0           []                               
                                                                                                  
 tf.__operators__.getitem_8 (Sl  (None,)             0           ['input_30[0][0]']               
 icingOpLambda)                                                                                   
                                                                                                  
 tf.__operators__.getitem_9 (Sl  (None,)             0           ['input_30[0][0]']               
 icingOpLambda)                                                                                   
                                                                                            

In [38]:
## Test input/output shapes
print('input:')
print(X_eval_cat[:1])
print(X_eval_num[:1])
print('output:')
print(model([X_eval_cat[:1],X_eval_num[:1]]))

input:
tf.Tensor([[0 0 1 1]], shape=(1, 4), dtype=int64)
tf.Tensor([[0.902      0.18918919 0.4        0.29960587 0.         0.18945269]], shape=(1, 6), dtype=float64)
output:
tf.Tensor([[0.2678597 0.7321403]], shape=(1, 2), dtype=float32)


In [39]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.005),loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
          metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name="acc")],)
model.fit([X_train_cat,X_train_num], y_train, epochs=50, batch_size=8)#,validation_data=(X_eval, y_eval))
_, accuracy = model.evaluate([X_test_cat,X_test_num], y_test, verbose=0)
print('Accuracy: %.2f' % (accuracy*100))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Accuracy: 86.05


In [40]:
_, train_acc = model.evaluate([X_train_cat,X_train_num], y_train)
_, test_acc = model.evaluate([X_test_cat,X_test_num], y_test)
y_pred = [list(x).index(max(x)) for x in model.predict([X_test_cat,X_test_num])]

print('First Model Acc:')
print("train acc", train_acc)
print(" test acc", test_acc)
print(tf.math.confusion_matrix(y_test,y_pred))

First Model Acc:
train acc 0.8550000190734863
 test acc 0.8604999780654907
tf.Tensor(
[[1562   45]
 [ 234  159]], shape=(2, 2), dtype=int32)


## Torch Section

In [40]:
import torch

X_train_cat = torch.tensor(X_train_cat, dtype=torch.int64)
X_train_num = torch.tensor(X_train_num,dtype=torch.float)
y_train = torch.tensor(y_train)
print(X_train_cat.shape,X_train_num.shape,y_train.shape)

X_test_cat = torch.tensor(X_test_cat, dtype=torch.int64)
X_test_num = torch.tensor(X_test_num,dtype=torch.float)
y_test = torch.tensor(y_test)
print(X_test_cat.shape,X_test_num.shape,y_test.shape)

X_eval_cat = torch.tensor(X_eval_cat, dtype=torch.int64)
X_eval_num = torch.tensor(X_eval_num,dtype=torch.float)
y_eval = torch.tensor(y_eval)
print(X_eval_cat.shape,X_eval_num.shape,y_eval.shape)

torch.Size([7200, 4]) torch.Size([7200, 6]) torch.Size([7200])
torch.Size([2000, 4]) torch.Size([2000, 6]) torch.Size([2000])
torch.Size([800, 4]) torch.Size([800, 6]) torch.Size([800])


In [79]:
import torch.nn as nn

class churn_model(nn.Module):

  def __init__(self, emb_sizes,cat_count,num_count):
    super().__init__()
    eml = []
    new_cat_size = 0
    for col,emb_size in emb_sizes.items():
      eml.append(nn.Embedding(emb_size[0],emb_size[1]))
      new_cat_size += emb_size[1]
    self.embeddings = nn.ModuleList(eml)
    self.linears = nn.Sequential()
    self.linears.append(nn.Linear(new_cat_size + num_count,32))
    self.linears.append(nn.ReLU())
    self.linears.append(nn.Linear(32,16))
    self.linears.append(nn.ReLU())
    self.linears.append(nn.Linear(16,8))
    self.linears.append(nn.ReLU())
    self.linears.append(nn.Linear(8,4))
    self.linears.append(nn.ReLU())
    self.linears.append(nn.Linear(4,2))
    self.linears.append(nn.Softmax(dim=1))

  def forward(self, x_categorical,x_numerical):
    embeddings = []
    for i,e in enumerate(self.embeddings):
      embeddings.append(e(x_categorical[:,i]))
    x = torch.cat(embeddings, 1)
    x = torch.cat([x,x_numerical],1)
    x = self.linears(x)
    return x

In [80]:
## Crossentropy loss

def cel(x, y):
    log_prob = -1.0 * torch.log(x)
    loss = log_prob.gather(1, y.unsqueeze(1))
    loss = loss.mean()
    return loss


## Testing the CEL
criterion = nn.CrossEntropyLoss()

batch_size = 5
nb_classes = 10
x = torch.randn(batch_size, nb_classes, requires_grad=True)
y = torch.randint(0, nb_classes, (batch_size,))


loss_reference = criterion(x, y)
loss = cel(nn.Softmax(dim=1)(x), y)

print(loss_reference - loss)

tensor(0., grad_fn=<SubBackward0>)


In [81]:
## Accuracy

def accu(x,y):
  index = torch.argmax(x,dim=1)
  valids = torch.sum((index == y).float())
  shape = x.shape[0]
  return torch.div(valids,shape)

## Testing accu function

batch_size = 5
nb_classes = 2
x = nn.Softmax(dim=1)(torch.randn(batch_size, nb_classes, requires_grad=True))
y = torch.randint(0, nb_classes, (batch_size,))

print(x)
print(x.shape[0])
print(y)

res = accu(x,y)
print(res)
#print(res.size())

tensor([[0.3875, 0.6125],
        [0.5510, 0.4490],
        [0.9869, 0.0131],
        [0.4042, 0.5958],
        [0.6600, 0.3400]], grad_fn=<SoftmaxBackward0>)
5
tensor([1, 1, 1, 0, 0])
tensor(0.4000)


In [83]:
from tqdm.notebook import tqdm_notebook

epochs = 300
batch_size = 8

model = churn_model(categorical_embedding_sizes,cat_count,num_count)

loss_function = cel
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

for i in range(epochs):
    '''
    permutation = torch.randperm(X_train_cat.shape[0])

    for j in tqdm_notebook(range(0,X_train_cat.shape[0],batch_size)): 
      # batch data
      indices = permutation[i:i+batch_size]
      X_batch_cat, X_batch_num, y_batch = X_train_cat[indices], X_train_num[indices], y_train[indices]
      # run mini batch
      y_batch_pred = model(X_batch_cat, X_batch_num)
      single_loss = loss_function(y_batch_pred, y_batch)
      # backpropogation
      optimizer.zero_grad()
      single_loss.backward()
      optimizer.step()
    '''
    # run model
    y_pred = model(X_train_cat, X_train_num)
    single_loss = loss_function(y_pred, y_train)
    # backpropogation
    optimizer.zero_grad()
    single_loss.backward()
    optimizer.step()

    #outputs
    print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')
    y_eval_pred = model(X_eval_cat,X_eval_num)
    eval_loss = loss_function(y_eval_pred,y_eval)
    accuracy = accu(y_eval_pred,y_eval)
    print(f"{' ':5} epoch: {i:3} eval loss: {eval_loss:10.4f}")
    print(f"{' ':12} eval accuracy: {accuracy:10.4}")
      

epoch:   0 loss: 0.77101052
      epoch:   0 eval loss:     0.7621
             eval accuracy:     0.1912
epoch:   1 loss: 0.75879395
      epoch:   1 eval loss:     0.7506
             eval accuracy:     0.1912
epoch:   2 loss: 0.74780732
      epoch:   2 eval loss:     0.7398
             eval accuracy:     0.1912
epoch:   3 loss: 0.73760408
      epoch:   3 eval loss:     0.7303
             eval accuracy:     0.1912
epoch:   4 loss: 0.72850853
      epoch:   4 eval loss:     0.7227
             eval accuracy:     0.1912
epoch:   5 loss: 0.72121722
      epoch:   5 eval loss:     0.7150
             eval accuracy:     0.1912
epoch:   6 loss: 0.71393228
      epoch:   6 eval loss:     0.7076
             eval accuracy:     0.1912
epoch:   7 loss: 0.70687950
      epoch:   7 eval loss:     0.7002
             eval accuracy:     0.1912
epoch:   8 loss: 0.69994462
      epoch:   8 eval loss:     0.6927
             eval accuracy:     0.3812
epoch:   9 loss: 0.69280612
      epoch:   9 e

In [84]:
## Confusion matrix and f1 score
import scipy

def confusion_matrix(x,y,num_classes = 2):
  x = x.detach().numpy()
  y = y.detach().numpy()
  x = np.argmax(x,axis=1)
  data = np.ones(y.shape[0], dtype=np.int64)
  ind = np.logical_and(x < num_classes, y < num_classes)
  if not np.all(ind):
      x = x[ind]
      y = y[ind]
      data = data[ind]


  cm = scipy.sparse.coo_matrix((data,(x,y)),shape=(num_classes,num_classes),dtype=np.int64).toarray()
  cm = np.nan_to_num(cm)

  return cm

## Confusion matrix test
x = torch.tensor(np.random.randint(2, size=(5,2)))
y = torch.tensor(np.random.randint(2, size=5))

print(x,y)
print(confusion_matrix(x,y))
  

tensor([[0, 1],
        [1, 0],
        [0, 0],
        [0, 1],
        [0, 0]]) tensor([1, 0, 0, 1, 1])
[[2 1]
 [0 2]]


In [85]:
## Model performance on Test data

y_test_pred = model(X_test_cat,X_test_num)
test_accuracy = accu(y_test_pred,y_test)
test_cm = confusion_matrix(y_test_pred,y_test)

print(f"Test Accuracy:{test_accuracy:2.4}")
print("Confusion Matrix:")
print(test_cm)

Test Accuracy:0.8575
Confusion Matrix:
[[1551  229]
 [  56  164]]


# Divided Input

In [81]:
import numpy as np

#X_train_cat = [np.reshape(X_train[col].values,(X_train[col].shape[0],1)) for col in categorical_columns]
X_train_cat = [X_train[col].values for col in categorical_columns]
X_train_num = np.stack([X_train[col].values for col in numerical_columns],1)
X_train = X_train_cat + [X_train_num]
y_train = np.reshape(y_train.values,y_train.shape[0])
print([ten.shape for ten in X_train],y_train.shape)

#X_test_cat = [np.reshape(X_test[col].values,(X_test[col].shape[0],1)) for col in categorical_columns]
X_test_cat = [X_test[col].values for col in categorical_columns]
X_test_num = np.stack([X_test[col].values for col in numerical_columns],1)
X_test = X_test_cat + [X_test_num]
y_test = np.reshape(y_test.values,y_test.shape[0])
print([ten.shape for ten in X_test],y_test.shape)

#X_eval_cat = [np.reshape(X_eval[col].values,(X_eval[col].shape[0],1)) for col in categorical_columns]
X_eval_cat = [X_eval[col].values for col in categorical_columns]
X_eval_num = np.stack([X_eval[col].values for col in numerical_columns],1)
X_eval = X_eval_cat + [X_eval_num]
y_eval = np.reshape(y_eval.values,y_eval.shape[0])
print([ten.shape for ten in X_eval],y_eval.shape)

[(7200,), (7200,), (7200,), (7200,), (7200, 6)] (7200,)
[(2000,), (2000,), (2000,), (2000,), (2000, 6)] (2000,)
[(800,), (800,), (800,), (800,), (800, 6)] (800,)


## Tensorflow Section

In [9]:
## Tensorflow tensor construction
import tensorflow as tf

X_train = [tf.convert_to_tensor(ten,dtype = (tf.int64 if i < 4 else tf.float64)) for i,ten in enumerate(X_train)]
y_train = tf.convert_to_tensor(y_train)

X_test = [tf.convert_to_tensor(ten,dtype = (tf.int64 if i < 4 else tf.float64)) for i,ten in enumerate(X_test)]
y_test = tf.convert_to_tensor(y_test)

X_eval = [tf.convert_to_tensor(ten,dtype = (tf.int64 if i < 4 else tf.float64)) for i,ten in enumerate(X_eval)]
X_eval_num = tf.convert_to_tensor(X_eval_num,dtype = tf.float64)
y_eval = tf.convert_to_tensor(y_eval)

In [22]:
in_layer = [tf.keras.Input(shape=(1 if len(ten.shape) == 1 else ten.shape[1])) for ten in X_train]

emb_layers = []
for i,col in enumerate(categorical_columns):
  em_size = categorical_embedding_sizes[col]
  emb_layer = tf.keras.layers.Embedding(em_size[0],em_size[1])(in_layer[i])
  emb_layers.append(tf.keras.layers.Flatten()(emb_layer))
merge = tf.keras.layers.Concatenate(axis=1)(emb_layers + in_layer[-1:])
x2 = tf.keras.layers.Dense(3,activation='relu')(merge)
x7 = tf.keras.layers.Dense(2,activation='softmax')(x2)
out_layer = x7
model2 = tf.keras.Model(inputs=in_layer, outputs=out_layer)
print(model2.summary())

Model: "model_4"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_21 (InputLayer)          [(None, 1)]          0           []                               
                                                                                                  
 input_22 (InputLayer)          [(None, 1)]          0           []                               
                                                                                                  
 input_23 (InputLayer)          [(None, 1)]          0           []                               
                                                                                                  
 input_24 (InputLayer)          [(None, 1)]          0           []                               
                                                                                            

In [23]:
## Test input/output shapes
print('input:')
x = [ten[:12] for ten in X_eval]
print(x)
print('output:')
print(model2(x))

input:
[<tf.Tensor: shape=(12,), dtype=int64, numpy=array([0, 1, 2, 0, 2, 1, 0, 2, 0, 1, 0, 0])>, <tf.Tensor: shape=(12,), dtype=int64, numpy=array([0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1])>, <tf.Tensor: shape=(12,), dtype=int64, numpy=array([1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1])>, <tf.Tensor: shape=(12,), dtype=int64, numpy=array([1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1])>, <tf.Tensor: shape=(12, 6), dtype=float64, numpy=
array([[0.902     , 0.18918919, 0.4       , 0.29960587, 0.        ,
        0.18945269],
       [0.832     , 0.13513514, 0.4       , 0.36148852, 0.        ,
        0.10793841],
       [0.956     , 0.13513514, 0.8       , 0.53713781, 0.        ,
        0.39675934],
       [0.896     , 0.24324324, 0.1       , 0.        , 0.33333333,
        0.79523855],
       [0.522     , 0.21621622, 0.4       , 0.        , 0.33333333,
        0.85477663],
       [0.502     , 0.48648649, 0.1       , 0.52228365, 0.33333333,
        0.99834494],
       [0.696     , 0.52702703, 0.6       , 0.543

In [24]:
model2.compile(optimizer=tf.keras.optimizers.Adam(lr = 0.005),loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
          metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name="acc")],)
model2.fit(X_train, y_train, epochs=50, batch_size=8)#,validation_data=(X_eval, y_eval))
_, accuracy = model2.evaluate(X_test, y_test, verbose=0)
print('Accuracy: %.2f' % (accuracy*100))

Epoch 1/50


  super(Adam, self).__init__(name, **kwargs)


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Accuracy: 86.25


In [25]:
_, train_acc = model2.evaluate(X_train, y_train)
_, test_acc = model2.evaluate(X_test, y_test)
y_pred = [list(x).index(max(x)) for x in model2.predict(X_test)]

print('First Model Acc:')
print("train acc", train_acc)
print(" test acc", test_acc)
print(tf.math.confusion_matrix(y_test,y_pred))

First Model Acc:
train acc 0.8561111092567444
 test acc 0.862500011920929
tf.Tensor(
[[1558   49]
 [ 226  167]], shape=(2, 2), dtype=int32)


## Pytorch Section

In [82]:
import torch

X_train = [torch.tensor(ten, dtype=(torch.int64 if i < 4 else torch.float)) for i,ten in enumerate(X_train)]
y_train = torch.tensor(y_train)
print([ten.shape for ten in X_train],y_train.shape)

X_test = [torch.tensor(ten, dtype=(torch.int64 if i < 4 else torch.float)) for i,ten in enumerate(X_test)]
y_test = torch.tensor(y_test)
print([ten.shape for ten in X_test],y_test.shape)

X_eval = [torch.tensor(ten, dtype=(torch.int64 if i < 4 else torch.float)) for i,ten in enumerate(X_eval)]
y_eval = torch.tensor(y_eval)
print([ten.shape for ten in X_eval],y_eval.shape)

[torch.Size([7200]), torch.Size([7200]), torch.Size([7200]), torch.Size([7200]), torch.Size([7200, 6])] torch.Size([7200])
[torch.Size([2000]), torch.Size([2000]), torch.Size([2000]), torch.Size([2000]), torch.Size([2000, 6])] torch.Size([2000])
[torch.Size([800]), torch.Size([800]), torch.Size([800]), torch.Size([800]), torch.Size([800, 6])] torch.Size([800])


In [83]:
import torch.nn as nn

class churn_model(nn.Module):

  def __init__(self, emb_sizes,cat_count,num_count):
    super().__init__()
    eml = []
    new_cat_size = 0
    for col,emb_size in emb_sizes.items():
      eml.append(nn.Embedding(emb_size[0],emb_size[1]))
      new_cat_size += emb_size[1]
    self.embeddings = nn.ModuleList(eml)
    self.linears = nn.Sequential()
    self.linears.append(nn.Linear(new_cat_size + num_count,32))
    self.linears.append(nn.ReLU())
    self.linears.append(nn.Linear(32,16))
    self.linears.append(nn.ReLU())
    self.linears.append(nn.Linear(16,8))
    self.linears.append(nn.ReLU())
    #self.linears.append(nn.Linear(8,4))
    #self.linears.append(nn.ReLU())
    self.linears.append(nn.Linear(8,2))
    self.linears.append(nn.Softmax(dim=1))

  def forward(self, x):
    embeddings = []
    for i,e in enumerate(self.embeddings):
      embeddings.append(e(x[i]))
    x = torch.cat(embeddings + x[len(self.embeddings):], 1)
    x = self.linears(x)
    return x

In [84]:
## Test input/output shapes
print('input:')
x = [ten[:1] for ten in X_eval]
print(x)
print('output:')
model2 = churn_model(categorical_embedding_sizes,cat_count,num_count)
print(model2(x))

input:
[tensor([0]), tensor([0]), tensor([1]), tensor([1]), tensor([[0.9020, 0.1892, 0.4000, 0.2996, 0.0000, 0.1895]])]
output:
tensor([[0.4890, 0.5110]], grad_fn=<SoftmaxBackward0>)


In [85]:
## Crossentropy loss

def cel(x, y):
    log_prob = -1.0 * torch.log(x)
    loss = log_prob.gather(1, y.unsqueeze(1))
    loss = loss.mean()
    return loss


## Testing the CEL
criterion = nn.CrossEntropyLoss()

batch_size = 5
nb_classes = 10
x = torch.randn(batch_size, nb_classes, requires_grad=True)
y = torch.randint(0, nb_classes, (batch_size,))


loss_reference = criterion(x, y)
loss = cel(nn.Softmax(dim=1)(x), y)

print(loss_reference - loss)

tensor(2.3842e-07, grad_fn=<SubBackward0>)


In [86]:
## Accuracy

def accu(x,y):
  index = torch.argmax(x,dim=1)
  valids = torch.sum((index == y).float())
  shape = x.shape[0]
  return torch.div(valids,shape)

## Testing accu function

batch_size = 5
nb_classes = 2
x = nn.Softmax(dim=1)(torch.randn(batch_size, nb_classes, requires_grad=True))
y = torch.randint(0, nb_classes, (batch_size,))

print(x)
print(x.shape[0])
print(y)

res = accu(x,y)
print(res)
#print(res.size())

tensor([[0.4309, 0.5691],
        [0.6378, 0.3622],
        [0.1065, 0.8935],
        [0.3350, 0.6650],
        [0.7413, 0.2587]], grad_fn=<SoftmaxBackward0>)
5
tensor([1, 0, 1, 1, 1])
tensor(0.8000)


In [87]:
from tqdm.notebook import tqdm_notebook

epochs = 300
batch_size = 8

model = churn_model(categorical_embedding_sizes,cat_count,num_count)

loss_function = cel
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

for i in range(epochs):
    '''
    permutation = torch.randperm(X_train_cat.shape[0])

    for j in tqdm_notebook(range(0,X_train_cat.shape[0],batch_size)): 
      # batch data
      indices = permutation[i:i+batch_size]
      X_batch_cat, X_batch_num, y_batch = X_train_cat[indices], X_train_num[indices], y_train[indices]
      # run mini batch
      y_batch_pred = model(X_batch_cat, X_batch_num)
      single_loss = loss_function(y_batch_pred, y_batch)
      # backpropogation
      optimizer.zero_grad()
      single_loss.backward()
      optimizer.step()
    '''
    # run model
    y_pred = model(X_train)
    single_loss = loss_function(y_pred, y_train)
    # backpropogation
    optimizer.zero_grad()
    single_loss.backward()
    optimizer.step()

    #outputs
    print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')
    y_eval_pred = model(X_eval)
    eval_loss = loss_function(y_eval_pred,y_eval)
    accuracy = accu(y_eval_pred,y_eval)
    print(f"{' ':5} epoch: {i:3} eval loss: {eval_loss:10.4f}")
    print(f"{' ':12} eval accuracy: {accuracy:10.4}")


epoch:   0 loss: 0.75785184
      epoch:   0 eval loss:     0.7474
             eval accuracy:     0.1912
epoch:   1 loss: 0.74482232
      epoch:   1 eval loss:     0.7364
             eval accuracy:     0.1912
epoch:   2 loss: 0.73429126
      epoch:   2 eval loss:     0.7261
             eval accuracy:     0.1912
epoch:   3 loss: 0.72456557
      epoch:   3 eval loss:     0.7160
             eval accuracy:     0.1912
epoch:   4 loss: 0.71508974
      epoch:   4 eval loss:     0.7052
             eval accuracy:     0.2013
epoch:   5 loss: 0.70490259
      epoch:   5 eval loss:     0.6933
             eval accuracy:     0.4613
epoch:   6 loss: 0.69370401
      epoch:   6 eval loss:     0.6803
             eval accuracy:     0.8012
epoch:   7 loss: 0.68148059
      epoch:   7 eval loss:     0.6660
             eval accuracy:     0.8087
epoch:   8 loss: 0.66802055
      epoch:   8 eval loss:     0.6506
             eval accuracy:     0.8087
epoch:   9 loss: 0.65346301
      epoch:   9 e

In [88]:
## Confusion matrix and f1 score
import scipy

def confusion_matrix(x,y,num_classes = 2):
  x = x.detach().numpy()
  y = y.detach().numpy()
  x = np.argmax(x,axis=1)
  data = np.ones(y.shape[0], dtype=np.int64)
  ind = np.logical_and(x < num_classes, y < num_classes)
  if not np.all(ind):
      x = x[ind]
      y = y[ind]
      data = data[ind]


  cm = scipy.sparse.coo_matrix((data,(x,y)),shape=(num_classes,num_classes),dtype=np.int64).toarray()
  cm = np.nan_to_num(cm)

  return cm

## Confusion matrix test
x = torch.tensor(np.random.randint(2, size=(5,2)))
y = torch.tensor(np.random.randint(2, size=5))

print(x,y)
print(confusion_matrix(x,y))

tensor([[0, 1],
        [0, 1],
        [0, 0],
        [0, 1],
        [0, 0]]) tensor([1, 1, 1, 0, 1])
[[0 2]
 [1 2]]


In [89]:
## Model performance on Test data

y_test_pred = model(X_test)
test_accuracy = accu(y_test_pred,y_test)
test_cm = confusion_matrix(y_test_pred,y_test)

print(f"Test Accuracy:{test_accuracy:2.4}")
print("Confusion Matrix:")
print(test_cm)

Test Accuracy:0.8625
Confusion Matrix:
[[1534  202]
 [  73  191]]
