# Library Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from torch import nn, optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader as DL
from torch.nn.utils import weight_norm as WN
import torch.nn.functional as F

from sklearn.model_selection import StratifiedKFold, KFold
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.impute import SimpleImputer

from time import time
import random as r

# Helper Functions

In [2]:
def breaker():
    print("\n" + 30*"-" +"\n")
    
def head(x=None, no_of_ele=5):
    breaker()
    print(x[:no_of_ele])
    breaker()
    
def getCol(x):
    return [col for col in x.columns]

def getObj(x):
    s = (x.dtypes == "object")
    return list(s[s].index)

def preprocess(x=None, *args):
    df = x.copy()
    df[args[0]]  = df[args[0]].map({'Female' : 0, 'Male' : 1})
    df[args[1]]  = df[args[1]].map({'No' : 0, 'Yes' : 1})
    df[args[2]]  = df[args[2]].map({'No' : 0, 'Yes' : 1})
    df[args[3]]  = df[args[3]].map({'No' : 0, 'Yes' : 1})
    df[args[4]]  = df[args[4]].map({'No' : 0, 'No phone service' : 1, 'Yes' : 2})
    df[args[5]]  = df[args[5]].map({'DSL' : 0, 'Fiber optic' : 1, 'No' : 2})
    df[args[6]]  = df[args[6]].map({'No' : 0, 'No internet service' : 1, 'Yes' : 2})
    df[args[7]]  = df[args[7]].map({'No' : 0, 'No internet service' : 1, 'Yes' : 2})
    df[args[8]]  = df[args[8]].map({'No' : 0, 'No internet service' : 1, 'Yes' : 2})
    df[args[9]]  = df[args[9]].map({'No' : 0, 'No internet service' : 1, 'Yes' : 2})
    df[args[10]] = df[args[10]].map({'No' : 0, 'No internet service' : 1, 'Yes' : 2})
    df[args[11]] = df[args[11]].map({'No' : 0, 'No internet service' : 1, 'Yes' : 2})
    df[args[12]] = df[args[12]].map({'Month-to-month' : 0, 'One year' : 1, 'Two year' : 2})
    df[args[13]] = df[args[13]].map({'No' : 0, 'Yes' : 1})
    df[args[14]] = df[args[14]].map({'Bank transfer (automatic)' : 0,
                                     'Credit card (automatic)' : 1,
                                     'Electronic check' : 2,
                                     'Mailed check' : 3})
    return df
    
si_mean = SimpleImputer(missing_values=np.nan, strategy="mean")
si_mf   = SimpleImputer(missing_values=np.nan, strategy="most_frequent")
sc_X    = StandardScaler()
mms_X   = MinMaxScaler(feature_range=(0, 1)) 

# Data Handling

In [3]:
tr_Set = pd.read_csv("../input/labdata-churn-challenge-2020/train.csv")
ts_Set = pd.read_csv("../input/labdata-churn-challenge-2020/test.csv")

breaker()
print("Train Set Shape :", repr(tr_Set.shape))
breaker()
print("Test Set Shape :", repr(ts_Set.shape))
breaker()
#print("Train Set Columns\n")
#for name in getCol(tr_Set):
#    print(name)
#breaker()
#print("Test Set Columns\n")
#for name in getCol(ts_Set):
#    print(name)
#breaker()
tr_Proc = preprocess(tr_Set, 'gender', 'Partner', 'Dependents', 'PhoneService',
                     'MultipleLines', 'InternetService', 'OnlineSecurity',
                     'OnlineBackup', 'DeviceProtection','TechSupport',
                     'StreamingTV', 'StreamingMovies', 'Contract', 
                     'PaperlessBilling', 'PaymentMethod')

ts_Proc = preprocess(ts_Set, 'gender', 'Partner', 'Dependents', 'PhoneService',
                     'MultipleLines', 'InternetService', 'OnlineSecurity',
                     'OnlineBackup', 'DeviceProtection','TechSupport',
                     'StreamingTV', 'StreamingMovies', 'Contract', 
                     'PaperlessBilling', 'PaymentMethod')

tr_Proc["TotalCharges"] = pd.to_numeric(tr_Proc["TotalCharges"], errors="coerce")
ts_Proc["TotalCharges"] = pd.to_numeric(ts_Proc["TotalCharges"], errors="coerce")

X = tr_Proc.iloc[:, 1:-1].copy().values
y = tr_Proc.iloc[:, -1].copy().values

X_test = ts_Proc.iloc[:, 1:].copy().values

X, X_test, y = X.astype(float), X_test.astype(float), y.astype(float)

X = si_mf.fit_transform(X)
X_test = si_mf.transform(X_test)

########## Scaling ##########
X = sc_X.fit_transform(X)
X_test = sc_X.transform(X_test)

#X = mms_X.fit_transform(X)
#X_test = mms_X.transform(X_test)

num_features = X.shape[1]


------------------------------

Train Set Shape : (5634, 21)

------------------------------

Test Set Shape : (1409, 20)

------------------------------



**Dataset Template**

In [4]:
class DS(Dataset):
    def __init__(this, X=None, y=None, mode="train"):
        this.mode = mode
        this.X = X
        if mode == "train":
            this.y = y
            
    def __len__(this):
        return this.X.shape[0]
    
    def __getitem__(this, idx):
        if this.mode == "train":
            return torch.FloatTensor(this.X[idx]) , torch.FloatTensor(this.y[idx])
        else:
            return torch.FloatTensor(this.X[idx])

# ANN

**Config**

In [5]:
class CFG():
    tr_batch_size = 512
    va_batch_size = 512
    ts_batch_size = 512
    
    epochs  = 50
    n_folds = 4
    
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    
    IL = num_features
    OL = 1
    HL_1 = [64]
    HL_2 = [128, 64]
    
cfg = CFG()
    
ts_data_setup = DS(X_test, None, "test")
ts_data = DL(ts_data_setup, batch_size=cfg.ts_batch_size, shuffle=False)

**Setup**

In [6]:
class MLP(nn.Module):
    def __init__(this, IL=None, HL=None, OL=None):
        super(MLP, this).__init__()
        
        this.DP1 = nn.Dropout(p=0.2)
        this.DP2 = nn.Dropout(p=0.5)
        this.HL  = HL
        
        if len(HL) == 1:
            this.BN1 = nn.BatchNorm1d(IL)
            this.FC1 = WN(nn.Linear(IL, HL[0]))

            this.BN2 = nn.BatchNorm1d(HL[0])
            this.FC2 = WN(nn.Linear(HL[0], OL))
            
        elif len(HL) == 2:
            this.BN1 = nn.BatchNorm1d(IL)
            this.FC1 = WN(nn.Linear(IL, HL[0]))

            this.BN2 = nn.BatchNorm1d(HL[0])
            this.FC2 = WN(nn.Linear(HL[0], HL[1]))
            
            this.BN3 = nn.BatchNorm1d(HL[1])
            this.FC3 = WN(nn.Linear(HL[1], OL))
            
        else:
            raise NotImplementedError("Only Supports Networks of Depth 1 and 2")
        
    def getOptimizer(this, lr=1e-3, wd=0):
        return optim.Adam(this.parameters(), lr=lr, weight_decay=wd)
    
    def forward(this, x):
        if len(this.HL) == 1:
            x = this.BN1(x)
            x = this.DP1(x)
            x = F.relu(this.FC1(x))
            x = this.BN2(x)
            x = this.DP2(x)
            x = torch.sigmoid(this.FC2(x))
            return x
        else:
            x = this.BN1(x)
            x = this.DP1(x)
            x = F.relu(this.FC1(x))
            x = this.BN2(x)
            x = this.DP2(x)
            x = F.relu(this.FC2(x))
            x = this.BN3(x)
            x = this.DP2(x)
            x = torch.sigmoid(this.FC3(x))
            return x

**ANN Helpers**

In [7]:
def train_fn(X=None, y=None, n_folds=None, HL_Used=None):
    breaker()
    print("Training ...")
    breaker()
    
    LP = []
    names = []
    bestLoss = {"train" : np.inf, "valid" : np.inf}
    fold = 0
    
    start_time = time()
    for tr_idx, va_idx in KFold(n_splits=n_folds, shuffle=True, random_state=0).split(X, y):
        print("Processing Fold {fold} ...".format(fold=fold+1))
        
        X_train, X_valid, y_train, y_valid = X[tr_idx], X[va_idx], y[tr_idx], y[va_idx]
        
        tr_data_setup = DS(X_train, y_train.reshape(-1,1))
        va_data_setup = DS(X_valid, y_valid.reshape(-1,1))
        
        DLS = {"train" : DL(tr_data_setup, batch_size=cfg.tr_batch_size, shuffle=True, generator=torch.manual_seed(0)), #, drop_last=True),
               "valid" : DL(va_data_setup, batch_size=cfg.va_batch_size, shuffle=False) #drop_last=True)
              }
        
        torch.manual_seed(0)
        model = MLP(cfg.IL, HL_Used, cfg.OL)
        model.to(cfg.device)
        
        optimizer = model.getOptimizer()
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, eps=1e-6, verbose=True)
        
        for e in range(cfg.epochs):
            epochLoss = {"train" : 0, "valid" : 0}
            for phase in ["train", "valid"]:
                if phase == "train":
                    model.train()
                else:
                    model.eval()
                lossPerPass = 0
                
                for f, l in DLS[phase]:
                    f, l = f.to(cfg.device), l.to(cfg.device)
                    
                    optimizer.zero_grad()
                    with torch.set_grad_enabled(phase == "train"):
                        output = model(f)
                        loss   = nn.BCELoss()(output, l)
                        if phase == "train":
                            loss.backward()
                            optimizer.step()
                    lossPerPass += (loss.item() / l.shape[0])
                epochLoss[phase] = lossPerPass
            LP.append(epochLoss)
            scheduler.step(epochLoss["valid"])
            #name = "./Model_Fold_{fold}".format(fold=fold)
            #names.append(name)
            #torch.save(model.state_dict(), name)
            if epochLoss["valid"] < bestLoss["valid"]:
                bestLoss = epochLoss
                name = "./Model_Fold_{fold}".format(fold=fold)
                names.append(name)
                torch.save(model.state_dict(), name)
        fold += 1
        
    breaker()
    print("Time Taken to Train {fold} folds for {e} epochs : {:.2f} minutes".format((time()-start_time)/60, fold=n_folds, e=cfg.epochs))
    breaker()
    print("Best Loss :", repr(bestLoss))
    breaker()
    print("Training Complete")
    breaker()
    return LP, names, model
    
def eval_fn(model=None, names=None, dataloader=None, num_obs_test=None):
    y_pred = np.zeros((num_obs_test, 1))
    
    for name in names:
        Pred = torch.zeros(cfg.ts_batch_size, 1).to(cfg.device)
        
        model.load_state_dict(torch.load(name))
        model.eval()
        
        for X in dataloader:
            X = X.to(cfg.device)
            with torch.no_grad():
                Prob = model(X)
            Pred = torch.cat((Pred, Prob), dim=0)
        Pred = Pred[cfg.ts_batch_size:].cpu().numpy()
        y_pred = np.add(y_pred, Pred)
    y_pred = np.divide(y_pred, len(names))
    
    y_pred[np.argwhere(y_pred > 0.5)]  = 1
    y_pred[np.argwhere(y_pred <= 0.5)] = 0
    return y_pred.astype(int)

# Training

In [8]:
LP, Names, Network = train_fn(X=X, y=y, n_folds=cfg.n_folds, HL_Used=cfg.HL_2)


------------------------------

Training ...

------------------------------

Processing Fold 1 ...
Processing Fold 2 ...
Epoch    46: reducing learning rate of group 0 to 1.0000e-04.
Epoch    50: reducing learning rate of group 0 to 1.0000e-05.
Processing Fold 3 ...
Epoch    48: reducing learning rate of group 0 to 1.0000e-04.
Processing Fold 4 ...

------------------------------

Time Taken to Train 4 folds for 50 epochs : 0.60 minutes

------------------------------

Best Loss : {'train': 0.010628409043871329, 'valid': 0.002645076979435497}

------------------------------

Training Complete

------------------------------



# Submission

In [9]:
ss = pd.read_csv("../input/labdata-churn-challenge-2020/sample_submission.csv")

y_pred = eval_fn(Network, set(Names), ts_data, ts_data_setup.__len__())

ss["Churn"] = y_pred
ss.to_csv("./submission.csv", index=False)