In [1]:
#Imporing libraries
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from torch.autograd import Variable
%matplotlib inline
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.manifold import TSNE
from sklearn.linear_model import LogisticRegression

np.random.seed(1234)
torch.manual_seed(1234)

<torch._C.Generator at 0x7f6c2b2e1890>

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# path to creative group
dl_path = '/content/drive/MyDrive/Fellowship.ai/Dockers_Outreach/Churn_Analysis/Deep_Learning/'

pd.set_option('display.max_columns', None)
df = pd.read_csv(dl_path+'marketing_data/data.csv', low_memory=False)

In [4]:
print(df.info())
df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 64000 entries, 0 to 63999
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   recency        64000 non-null  int64  
 1   history        64000 non-null  float64
 2   used_discount  64000 non-null  int64  
 3   used_bogo      64000 non-null  int64  
 4   zip_code       64000 non-null  object 
 5   is_referral    64000 non-null  int64  
 6   channel        64000 non-null  object 
 7   offer          64000 non-null  object 
 8   conversion     64000 non-null  int64  
dtypes: float64(1), int64(5), object(3)
memory usage: 4.4+ MB
None


Unnamed: 0,recency,history,used_discount,used_bogo,zip_code,is_referral,channel,offer,conversion
0,10,142.44,1,0,Surburban,0,Phone,Buy One Get One,0
1,6,329.08,1,1,Rural,1,Web,No Offer,0
2,7,180.65,0,1,Surburban,1,Web,Buy One Get One,0
3,9,675.83,1,0,Rural,1,Web,Discount,0
4,2,45.34,1,0,Urban,0,Web,Buy One Get One,0


In [5]:
# let's define churn as a customer who has not purchased in the last 6 months
df['churn'] = df.recency.apply(lambda x: 1 if (x > 6) else 0)
df['treatment'] = df.offer.apply(lambda x: 1 if x != 'No Offer' else 0)

df.treatment.describe()

count    64000.000000
mean         0.667094
std          0.471257
min          0.000000
25%          0.000000
50%          1.000000
75%          1.000000
max          1.000000
Name: treatment, dtype: float64

In [6]:
def offer_conversion(df):
    if (df['offer'] != 'No Offer') & (df['conversion'] == 1):
        return 1
    else:
        return 0

def no_offer_conversion(df):
    if (df['offer'] == 'No Offer') & (df['conversion'] == 1):
        return 1
    else:
        return 0

In [7]:
#df['convert_with_offer'] = df.apply(offer_conversion, axis=1)
#df['convert_no_offer'] = df.apply(no_offer_conversion, axis=1)

In [8]:
df.describe(exclude='number').T

Unnamed: 0,count,unique,top,freq
zip_code,64000,3,Surburban,28776
channel,64000,3,Web,28217
offer,64000,3,Buy One Get One,21387


In [9]:
df.describe(include='number').T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
recency,64000.0,5.763734,3.507592,1.0,2.0,6.0,9.0,12.0
history,64000.0,242.085656,256.158608,29.99,64.66,158.11,325.6575,3345.93
used_discount,64000.0,0.551031,0.497393,0.0,0.0,1.0,1.0,1.0
used_bogo,64000.0,0.549719,0.497526,0.0,0.0,1.0,1.0,1.0
is_referral,64000.0,0.50225,0.499999,0.0,0.0,1.0,1.0,1.0
conversion,64000.0,0.146781,0.35389,0.0,0.0,0.0,0.0,1.0
churn,64000.0,0.428359,0.494845,0.0,0.0,0.0,1.0,1.0
treatment,64000.0,0.667094,0.471257,0.0,0.0,1.0,1.0,1.0


In [10]:
#Defining columns
categorical_columns = list(df.select_dtypes(exclude=['float', 'int']).columns.difference(['Churn']))
numerical_columns = list(df.columns.difference(categorical_columns))
outputs = ['churn']

In [12]:
#Processing columns

#Numerical
numerical_data = np.stack([df[col].values for col in numerical_columns], 1)
numerical_data = torch.tensor(numerical_data, dtype=torch.float)

#Categorical
for category in categorical_columns:
    df[category] = df[category].astype('category')
    
ch = df['channel'].cat.codes.values
off = df['offer'].cat.codes.values
zc = df['zip_code'].cat.codes.values

categorical_data = np.stack([ch, off, zc], 1)
categorical_data = torch.tensor(categorical_data, dtype=torch.int64)

#Outputs
df[outputs] = df[outputs].astype(int)

outputs = torch.tensor(df[outputs].values).flatten()
outputs = outputs.long()

In [13]:
#Dividing the data
total_records = numerical_data.shape[0]
train_records = int(total_records * .6)
valid_records = int(total_records * .2)
test_records = int(total_records * .2)
numerical_train_data = numerical_data[:train_records]
numerical_valid_data = numerical_data[train_records:train_records+valid_records]
numerical_test_data = numerical_data[train_records+valid_records:total_records]
categorical_train_data = categorical_data[:train_records]
categorical_valid_data = categorical_data[train_records:train_records+valid_records]
categorical_test_data = categorical_data[train_records+valid_records:total_records]
train_outputs = outputs[:train_records]
valid_outputs = outputs[train_records:train_records+valid_records]
test_outputs = outputs[train_records+valid_records:total_records]

In [14]:
from torch.nn.parameter import Parameter
from torch.nn import init
import math

#Creating the Neural Network
class Model(nn.Module):

    def __init__(self):
        super().__init__()
        self.layer1 = nn.Linear(8, 100)
        self.relu = nn.ReLU(inplace=True)
        self.bn1 = nn.BatchNorm1d(100)
        
        self.weights1 = Parameter(torch.Tensor(1, 112))
        init.kaiming_uniform_(self.weights1, a=math.sqrt(4))
        
        self.weights2 = Parameter(torch.Tensor(1, 112))
        init.kaiming_uniform_(self.weights2, a=math.sqrt(4))
        
        self.bias1 = Parameter(torch.Tensor(1))
        fan_in, _ = init._calculate_fan_in_and_fan_out(self.weights1)
        bound = 1 / math.sqrt(fan_in)
        init.uniform_(self.bias1, -bound, bound)
        
        self.bias2 = Parameter(torch.Tensor(1))
        fan_in, _ = init._calculate_fan_in_and_fan_out(self.weights2)
        bound = 1 / math.sqrt(fan_in)
        init.uniform_(self.bias2, -bound, bound)
        
        self.layer1_1 = nn.Embedding(3, 4)
        self.bn1_1 = nn.BatchNorm1d(4)
        self.layer1_2 = nn.Embedding(3, 4)
        self.bn1_2 = nn.BatchNorm1d(4)
        self.layer1_3 = nn.Embedding(3, 4)
        self.bn1_3 = nn.BatchNorm1d(4)
        
        self.layer2 = nn.Linear(112, 112)
        self.bn2 = nn.BatchNorm1d(112)
        
    def forward(self, x_numerical, x_categorical):
        x1 = self.layer1(x_numerical)
        x1 = self.relu(x1)
        x1 = self.bn1(x1)
        
        x1_embedding = self.layer1_1(x_categorical[:,0])
        x1_embedding = self.relu(x1_embedding)
        x1_embedding = self.bn1_1(x1_embedding)
        
        x2_embedding = self.layer1_2(x_categorical[:,1])
        x2_embedding = self.relu(x2_embedding)
        x2_embedding = self.bn1_2(x2_embedding)
        
        x3_embedding = self.layer1_3(x_categorical[:,2])
        x3_embedding = self.relu(x3_embedding)
        x3_embedding = self.bn1_3(x3_embedding)
        
        x_embedding = torch.cat([x1_embedding,x2_embedding,x3_embedding], 1)
                
        x1 = torch.cat([x1 , x_embedding], 1)
        
        x2 = self.layer2(x1)        
        emb = self.relu(x2)
        x2 = self.bn2(emb)
        
        x2_weights1 = torch.mm(x2, self.weights1.t()) + self.bias1
        x2_weights2 = torch.mm(x2, self.weights2.t()) + self.bias2
        
        x3 = torch.cat([x2_weights1, x2_weights2], 1)
                
        return emb, self.weights1, self.weights2, x3

In [15]:
model = Model()

In [16]:
df.describe(include='number').T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
recency,64000.0,5.763734,3.507592,1.0,2.0,6.0,9.0,12.0
history,64000.0,242.085656,256.158608,29.99,64.66,158.11,325.6575,3345.93
used_discount,64000.0,0.551031,0.497393,0.0,0.0,1.0,1.0,1.0
used_bogo,64000.0,0.549719,0.497526,0.0,0.0,1.0,1.0,1.0
is_referral,64000.0,0.50225,0.499999,0.0,0.0,1.0,1.0,1.0
conversion,64000.0,0.146781,0.35389,0.0,0.0,0.0,0.0,1.0
churn,64000.0,0.428359,0.494845,0.0,0.0,0.0,1.0,1.0
treatment,64000.0,0.667094,0.471257,0.0,0.0,1.0,1.0,1.0


In [17]:
#===============================================================================================
# Defining churn:loyal weight ratio. churn_percentage=0.7 means churn:loyal weight ratio of 7:3.
# Setting churn_percentage=0.5 to get an unweighted model
#===============================================================================================
## Our data contains about 43% churned customers
churn_percentage = 0.43

#Defining loss function
loss_function = nn.CrossEntropyLoss(weight=torch.Tensor([1-churn_percentage, churn_percentage]))
# loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
#29 Juni 2020: menambahkan scheduler learning rate
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.9)

In [18]:
#Training the data
epochs = 2
batch_size = 400
mean_losses_train = []
mean_losses_valid = []
best_loss_valid = np.inf

for i in range(epochs):
    model.train()
    aggregated_losses_train = []
    aggregated_losses_valid = []
    i += 1
    idxs = np.random.permutation(train_records)
    for j in range((train_records//batch_size)):
        start_train = j*batch_size
        end_train = start_train+batch_size
        idxs_batch = idxs[start_train:end_train]

        train, weights1, weights2, train_embed = model(numerical_train_data[idxs_batch], categorical_train_data[idxs_batch]) # yg error 1
        train_loss = loss_function(train_embed, train_outputs[idxs_batch])
        aggregated_losses_train.append(train_loss)

        print(f'iteration: {j:3} loss: {train_loss.item():10.8f}')

        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        mean_loss_train = torch.mean(torch.stack(aggregated_losses_train))
        
    print(f'epoch: {i:3} mean loss training: {mean_loss_train.item():10.8f}')
    mean_losses_train.append(mean_loss_train)
    
    model.eval()
    with torch.set_grad_enabled(False):
        for k in range((valid_records//batch_size)):
            start_valid = k*batch_size
            end_valid = start_valid+batch_size
            valid, weights1, weights2, valid_embed = model(numerical_valid_data[start_valid:end_valid], categorical_valid_data[start_valid:end_valid])
            valid_loss = loss_function(valid_embed, valid_outputs[start_valid:end_valid])
            aggregated_losses_valid.append(valid_loss)
    mean_loss_valid = torch.mean(torch.stack(aggregated_losses_valid))
    print(f'epoch: {i:3} mean loss validation: {mean_loss_valid:.8f}')
    
    if mean_loss_valid.cpu().numpy()[()] < best_loss_valid:
        best_loss_valid = mean_loss_valid
        torch.save(model.state_dict(), "model_train_cre_{}.pth".format(churn_percentage))
        best_epoch = i        
    
    mean_losses_valid.append(mean_loss_valid)
    scheduler.step()

iteration:   0 loss: 0.97195375
iteration:   1 loss: 0.78738350
iteration:   2 loss: 0.72690475
iteration:   3 loss: 0.68053591
iteration:   4 loss: 0.60701221
iteration:   5 loss: 0.58993936
iteration:   6 loss: 0.55632275
iteration:   7 loss: 0.52437007
iteration:   8 loss: 0.50128090
iteration:   9 loss: 0.48120534
iteration:  10 loss: 0.44732088
iteration:  11 loss: 0.43904611
iteration:  12 loss: 0.40768781
iteration:  13 loss: 0.39202169
iteration:  14 loss: 0.37582752
iteration:  15 loss: 0.34398428
iteration:  16 loss: 0.33396608
iteration:  17 loss: 0.29109889
iteration:  18 loss: 0.29396838
iteration:  19 loss: 0.26861793
iteration:  20 loss: 0.26129377
iteration:  21 loss: 0.23506103
iteration:  22 loss: 0.21828464
iteration:  23 loss: 0.20861980
iteration:  24 loss: 0.19762470
iteration:  25 loss: 0.18368812
iteration:  26 loss: 0.17103617
iteration:  27 loss: 0.15923019
iteration:  28 loss: 0.16168413
iteration:  29 loss: 0.14645535
iteration:  30 loss: 0.15544160
iteratio

In [19]:
torch.save(model.state_dict(), dl_path+"model_train_cre_{}.pth".format(churn_percentage))

In [20]:
#Creating predictions
with torch.no_grad():
    valid, valid_weights1, valid_weights2, valid_embed = model(numerical_valid_data, categorical_valid_data)
    valid_loss = loss_function(valid_embed, valid_outputs)
    total_valid_loss = valid_loss
print(f'Loss: {total_valid_loss:.8f}')

Loss: 0.00164740


In [21]:
from sklearn.metrics import f1_score

valid_val = np.argmax(valid_embed, axis=1)
print(confusion_matrix(valid_outputs, valid_val))
print(classification_report(valid_outputs, valid_val))
print("Accuracy: ", accuracy_score(valid_outputs, valid_val))
print("F1 Score: ", f1_score(valid_outputs, valid_val, average='macro'))

[[7279    0]
 [   0 5521]]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      7279
           1       1.00      1.00      1.00      5521

    accuracy                           1.00     12800
   macro avg       1.00      1.00      1.00     12800
weighted avg       1.00      1.00      1.00     12800

Accuracy:  1.0
F1 Score:  1.0
