In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [30]:
df = pd.read_csv('data_for_churn_analysis.csv')

In [11]:
df.shape

(104143, 18)

In [4]:
df.head()

Unnamed: 0,device,first_payment_amount,age,city,number_of_cards,payments_initiated,payments_failed,payments_completed,payments_completed_amount_first_7days,reward_purchase_count_first_7days,coins_redeemed_first_7days,is_referral,visits_feature_1,visits_feature_2,given_permission_1,given_permission_2,user_id,is_churned
0,samsung,0,20.0,Ahmedabad,2.0,1.0,0.0,1.0,0.0,,0.0,False,3.0,0.0,1,0,269438,0
1,xiaomi,0,20.0,Surat,1.0,1.0,0.0,1.0,0.0,,0.0,True,0.0,0.0,1,0,139521,0
2,xiaomi,0,20.0,Kullu,1.0,2.0,1.0,1.0,0.0,0.0,0.0,True,0.0,0.0,0,1,307352,0
3,oneplus,0,20.0,Mumbai,2.0,4.0,1.0,2.0,322.0,2.0,20.0,False,0.0,0.0,1,1,456424,0
4,apple,0,20.0,Pune,2.0,1.0,0.0,1.0,0.0,0.0,0.0,False,0.0,1.0,1,1,398779,0


In [10]:
df['is_churned'].value_counts(normalize=True)

is_churned
0    0.713192
1    0.286808
Name: proportion, dtype: float64

In [12]:
df.columns

Index(['device', 'first_payment_amount', 'age', 'city', 'number_of_cards',
       'payments_initiated', 'payments_failed', 'payments_completed',
       'payments_completed_amount_first_7days',
       'reward_purchase_count_first_7days', 'coins_redeemed_first_7days',
       'is_referral', 'visits_feature_1', 'visits_feature_2',
       'given_permission_1', 'given_permission_2', 'user_id', 'is_churned'],
      dtype='object')

## **Deep neural networks (DNNs) for propensity score matching**

1. **Impact of Referrals on Customer Acquisition and Retention**:
   - Research Question: Do customers acquired through referrals (`is_referral`) exhibit different behaviors and retention rates compared to non-referred customers?
   - Treatment: Customer acquisition through referrals
   - Outcome: Customer behavior (e.g., `payments_initiated`, `payments_completed`, `visits_feature_1`, `visits_feature_2`) and churn (`is_churned`)
   - Potential Confounders: `device`, `age`, `city`, `number_of_cards`, `payments_failed`, `payments_completed_amount_first_7days`, `reward_purchase_count_first_7days`, `coins_redeemed_first_7days`, `given_permission_1`, `given_permission_2`

In [57]:
# Packages

from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn

### **Preprocessing**

In [33]:
# Preprocessing
obj_cols = X.select_dtypes(include='object').columns
le = LabelEncoder()
for col in obj_cols:
    [col] = le.fit_transform(df[col])

df['is_referral'] = df['is_referral'].astype('int')


In [44]:
# Dealing with missing values
missing_cols = df.columns[df.isna().any()].tolist()
for col in missing_cols:
    df[col] = df[col].fillna(df[col].mean())

### **Data Split**

In [49]:
confounders = [
    'device',
    'age',
    'city',
    'number_of_cards',
    'payments_failed',
    'payments_completed_amount_first_7days',
    'reward_purchase_count_first_7days',
    'coins_redeemed_first_7days',
    'given_permission_1',
    'given_permission_2',
    'is_referral' # treatment
]

# 
y = df['is_churned']
X = df[confounders]

In [52]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [62]:
# converting to pytorch tensors
X_train = torch.tensor(X_train.to_numpy(), dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_val = torch.tensor(X_val.to_numpy(), dtype=torch.float32)
y_val = torch.tensor(y_val, dtype=torch.long)

: 

In [61]:
X_train.to_numpy()

array([[ 25.,  34., 438., ...,   1.,   1.,   1.],
       [ 67.,  31.,   5., ...,   1.,   0.,   0.],
       [ 69.,  39., 438., ...,   1.,   0.,   0.],
       ...,
       [ 42.,  67., 259., ...,   1.,   0.,   1.],
       [  2.,  22.,  63., ...,   1.,   1.,   0.],
       [ 54.,  26., 105., ...,   1.,   1.,   0.]])