In [1]:
import warnings
warnings.simplefilter("ignore")
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np

from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split

from sklearn import metrics
from sklearn.metrics import f1_score

In [2]:
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

In [3]:
df = df_train.drop("ID", axis=1)
df = pd.get_dummies(df, drop_first=True)

X = df.drop('Is_Churn', axis=1)
Y = df['Is_Churn']

# Adding samples to make all the categorical label values same
oversample = SMOTE()
X, Y = oversample.fit_resample(X, Y)

Y.value_counts()

0    5113
1    5113
Name: Is_Churn, dtype: int64

In [4]:
# Feature Scaling on training data
scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)
X

Unnamed: 0,Age,Balance,Vintage,Transaction_Status,Credit_Card,Gender_Male,Income_5L - 10L,Income_Less than 5L,Income_More than 15L,Product_Holdings_2,Product_Holdings_3+,Credit_Category_Good,Credit_Category_Poor
0,-0.669476,-0.505161,1.350891,-0.813937,-1.190826,-0.886856,1.972431,-0.458764,-0.413312,-0.793991,-0.165928,-0.448420,-0.793009
1,1.172757,0.096530,-0.101097,1.228596,0.839753,-0.886856,-0.506989,2.179772,-0.413312,-0.793991,-0.165928,-0.448420,1.261020
2,-0.777843,-0.238633,-0.101097,1.228596,-1.190826,-0.886856,-0.506989,-0.458764,2.419481,1.259461,-0.165928,-0.448420,1.261020
3,0.089090,1.095187,-1.553084,1.228596,0.839753,-0.886856,-0.506989,-0.458764,2.419481,1.259461,-0.165928,-0.448420,1.261020
4,-0.344376,0.130388,-0.827091,1.228596,0.839753,-0.886856,-0.506989,-0.458764,2.419481,-0.793991,-0.165928,2.230052,-0.793009
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10221,0.847657,-0.582524,-0.827091,-0.813937,0.839753,-0.886856,-0.506989,-0.458764,-0.413312,-0.793991,-0.165928,-0.448420,1.261020
10222,1.714591,0.003089,-1.553084,1.228596,-1.190826,-0.886856,-0.506989,-0.458764,-0.413312,-0.793991,-0.165928,-0.448420,-0.793009
10223,1.064391,0.570429,0.624897,-0.813937,0.839753,-0.886856,-0.506989,-0.458764,2.419481,-0.793991,-0.165928,-0.448420,-0.793009
10224,-0.344376,-0.648876,-0.827091,1.228596,-1.190826,-0.886856,-0.506989,-0.458764,2.419481,-0.793991,-0.165928,-0.448420,1.261020


In [5]:
!pip install tensorflow

import tensorflow
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense



In [6]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.25, random_state=604)

model = Sequential()
model.add(Dense(11, activation='relu', input_dim=13)) # input layer 1
model.add(Dense(22)) # input layer 2
model.add(Dense(1, activation='sigmoid')) # output layer

In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 11)                154       
                                                                 
 dense_1 (Dense)             (None, 22)                264       
                                                                 
 dense_2 (Dense)             (None, 1)                 23        
                                                                 
Total params: 441
Trainable params: 441
Non-trainable params: 0
_________________________________________________________________


In [8]:
model.compile(loss='binary_crossentropy', optimizer='Adam', metrics=['accuracy'])

In [9]:
model.fit(X_train, Y_train, epochs=10, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1fb87a5ef10>

In [10]:
model.layers[0].get_weights()

[array([[-0.41867968, -0.2088456 ,  0.41580066,  0.32489428, -0.85634947,
          0.1902173 ,  0.34635553, -0.2527707 ,  0.33032504,  0.07830936,
          0.06591579],
        [ 0.01744212,  0.02739776,  0.15533419,  0.04424138,  0.05434725,
         -0.0251063 ,  0.04840262, -0.307188  , -0.16336168,  0.46482995,
          0.2764602 ],
        [-0.04220733,  0.2407377 ,  0.4497325 ,  0.18048151, -0.23096342,
         -0.13078482, -0.04210841, -0.04820601,  0.05594381,  0.14947982,
         -0.24639797],
        [ 0.22009088, -0.01586326, -0.2969543 , -0.10353024,  0.09972384,
         -0.19025828, -0.47240025,  0.03832972, -0.38963675, -0.50747174,
         -0.29150194],
        [-0.18679139,  0.11095969, -0.15633708,  0.06523973,  0.30526474,
         -0.10245388,  0.21317713,  0.09691777, -0.41070485, -0.15663548,
          0.28430238],
        [ 0.34123543,  0.10587692, -0.22440183,  0.05282485, -0.08694829,
         -0.43326715, -0.31712586,  0.08037245, -0.15801525, -0.1499607

In [11]:
model.layers[1].get_weights()

[array([[ 0.14679681,  0.1806424 ,  0.35612684, -0.2323486 ,  0.02895206,
          0.2649392 ,  0.52069956, -0.16095582, -0.16829072,  0.00409852,
         -0.26252168, -0.43793783,  0.42099616, -0.00455776, -0.0035033 ,
          0.5171965 , -0.445955  , -0.15411353, -0.08810164,  0.16347614,
         -0.02949967,  0.21306747],
        [-0.09802327,  0.24458879,  0.365676  ,  0.14679521, -0.3024353 ,
         -0.29133528, -0.00919707, -0.11136722, -0.2679541 ,  0.010818  ,
         -0.26996964,  0.3106006 ,  0.48614043,  0.15847796,  0.191485  ,
         -0.02437024, -0.36154753,  0.4303241 , -0.16639209, -0.10251638,
          0.43747684,  0.23521267],
        [-0.24911703, -0.29302913,  0.14020187,  0.17009784,  0.30815634,
         -0.44109744,  0.18299198, -0.37899038, -0.08127011,  0.3109512 ,
         -0.1115253 ,  0.26562083,  0.12785691, -0.3305369 , -0.14003451,
          0.17097625, -0.10869582,  0.10160226,  0.04543408, -0.25478122,
          0.01602786, -0.3132443 ],
    

In [12]:
model.layers[2].get_weights()

[array([[-0.23254417],
        [ 0.24154651],
        [-0.16965853],
        [-0.00715057],
        [ 0.06675591],
        [-0.0749981 ],
        [-0.426205  ],
        [-0.19180548],
        [ 0.2506088 ],
        [-0.22594403],
        [-0.42687657],
        [ 0.03675992],
        [-0.06704158],
        [ 0.35808074],
        [-0.16352133],
        [-0.3393746 ],
        [ 0.52180177],
        [-0.3877133 ],
        [ 0.35117498],
        [ 0.18681052],
        [-0.12097418],
        [ 0.4209587 ]], dtype=float32),
 array([-0.05063106], dtype=float32)]

In [25]:
Y_log = model.predict(X_test)
Y_pred = np.where(Y_log>0.5, 1, 0)

In [26]:
f_one_score = (f1_score(Y_test, Y_pred, average='macro'))*100
print("F1 Score:", f_one_score)

F1 Score: 76.4566952738236


In [27]:
# Data Preprocessing and Feature Engineering on Testing data

X = df_test.drop("ID", axis=1)
X = pd.get_dummies(X, drop_first=True)

# Feature Scaling

scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

Predicted_Churn_log = model.predict(X)
Predicted_Churn = np.where(Predicted_Churn_log>0.5, 1, 0)

# Checking the predicted churn details and storing in dataframe format
predicted_output = pd.DataFrame()
predicted_output['ID'] = df_test["ID"]
predicted_output['Is_Churn'] = Predicted_Churn
predicted_output

Unnamed: 0,ID,Is_Churn
0,55480787,0
1,9aededf2,0
2,a5034a09,0
3,b3256702,0
4,dc28adb5,0
...,...,...
2846,19e40adf,1
2847,52d5bc8d,1
2848,f708121b,1
2849,f008715d,1


In [28]:
predicted_output.to_csv("sample_submission9.csv", index=False)