In [1]:
# 1. Read Dataset
# 2. Extract valuable features
# 3. Data preprocessing
#   3.1 Remove strings from data
#   3.2 Informations are equal to each other
# 4. Divide dataset to train and test set
# 5. Create Neural Network
# 6. Compile and Fit
# 7. Let's predict
# 8. Congrats!

import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

import torch
from torch import nn
from torch.nn import functional as F

In [2]:
dataframe = pd.read_csv("data/Churn_Modelling.csv")
dataframe.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
dataframe = dataframe.iloc[:, 3:]
dataframe.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
x = dataframe.iloc[:, :-1].values
y = dataframe.iloc[:, -1].values
x

array([[619, 'France', 'Female', ..., 1, 1, 101348.88],
       [608, 'Spain', 'Female', ..., 0, 1, 112542.58],
       [502, 'France', 'Female', ..., 1, 0, 113931.57],
       ...,
       [709, 'France', 'Female', ..., 0, 1, 42085.58],
       [772, 'Germany', 'Male', ..., 1, 0, 92888.52],
       [792, 'France', 'Female', ..., 1, 0, 38190.78]], dtype=object)

In [5]:
from sklearn.preprocessing import LabelEncoder
gender_encoder = LabelEncoder()
country_encoder = LabelEncoder()
x[:, 1] = country_encoder.fit_transform(x[:, 1])
x[:, 2] = gender_encoder.fit_transform(x[:, 2])
x

array([[619, 0, 0, ..., 1, 1, 101348.88],
       [608, 2, 0, ..., 0, 1, 112542.58],
       [502, 0, 0, ..., 1, 0, 113931.57],
       ...,
       [709, 0, 0, ..., 0, 1, 42085.58],
       [772, 1, 1, ..., 1, 0, 92888.52],
       [792, 0, 0, ..., 1, 0, 38190.78]], dtype=object)

In [6]:
sc = StandardScaler()
x = sc.fit_transform(x)
x


array([[-0.32622142, -0.90188624, -1.09598752, ...,  0.64609167,
         0.97024255,  0.02188649],
       [-0.44003595,  1.51506738, -1.09598752, ..., -1.54776799,
         0.97024255,  0.21653375],
       [-1.53679418, -0.90188624, -1.09598752, ...,  0.64609167,
        -1.03067011,  0.2406869 ],
       ...,
       [ 0.60498839, -0.90188624, -1.09598752, ..., -1.54776799,
         0.97024255, -1.00864308],
       [ 1.25683526,  0.30659057,  0.91241915, ...,  0.64609167,
        -1.03067011, -0.12523071],
       [ 1.46377078, -0.90188624, -1.09598752, ...,  0.64609167,
        -1.03067011, -1.07636976]])

In [7]:
x.shape

(10000, 10)

In [8]:
import torch
from torch import nn
from torch.nn import functional as F

class NeuralNetwork(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.input_layer = nn.Linear(10, 32)
        self.hidden_layer1 = nn.Linear(32, 64)
        self.hidden_layer2 = nn.Linear(64, 128)
        self.hidden_layer3 = nn.Linear(128, 32)
        self.output_layer = nn.Linear(32, 1)
        self.dropout = nn.Dropout(0.2)
    
    def forward(self, x):
        x = self.input_layer(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.hidden_layer1(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.hidden_layer2(x)
        x = F.relu(x)
        x = self.hidden_layer3(x)
        x = F.relu(x)
        x = self.output_layer(x)
        x = F.sigmoid(x)      
        
        return x

In [9]:
x = torch.from_numpy(x)
y = torch.from_numpy(y).view(-1, 1)

In [10]:
dataset = torch.utils.data.TensorDataset(x, y)
train_set, test_set = torch.utils.data.random_split(dataset, [int(0.8*len(dataset)), int(0.2*len(dataset))])

In [11]:
train_dataloader = torch.utils.data.DataLoader(train_set, batch_size=32, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_set, batch_size=32, shuffle=True)


In [12]:
from torch.utils import tensorboard
model = NeuralNetwork()
loss_fn = nn.MSELoss()
optim = torch.optim.Adam(model.parameters(), lr=1e-3)

nb_epoch = 50 
summary = tensorboard.SummaryWriter()
for n in range(nb_epoch):
    for i, data in enumerate(train_dataloader):
        input_data, label = data
        input_data = torch.autograd.Variable(input_data).float()
        label = torch.autograd.Variable(label).float()
        y_pred = model(input_data)
        optim.zero_grad()
        loss = loss_fn(label, y_pred)
        loss.backward()
        optim.step()
        summary.add_scalar("loss/train", loss.item(), i + n*len(train_dataloader))
    test_loss = 0
    with torch.no_grad():
        for i, data in enumerate(test_dataloader):
            input_data, label = data
            input_data = torch.autograd.Variable(input_data).float()
            label = torch.autograd.Variable(label).float()
            y_pred = model(input_data)
            loss = loss_fn(label, y_pred)
            test_loss += loss.item()
    summary.add_scalar("loss/test", test_loss/len(test_dataloader), n)
    summary.flush()
    print(f"Epoch: {n} training loss: {loss.item()} test loss: {test_loss}")



Epoch: 0 training loss: 0.10496843606233597 test loss: 8.519651595503092
Epoch: 1 training loss: 0.1356079876422882 test loss: 7.816850334405899
Epoch: 2 training loss: 0.2406378984451294 test loss: 7.6637129709124565
Epoch: 3 training loss: 0.1845950484275818 test loss: 7.670262131839991
Epoch: 4 training loss: 0.14439287781715393 test loss: 7.6792586743831635
Epoch: 5 training loss: 0.13145475089550018 test loss: 7.080974765121937
Epoch: 6 training loss: 0.1455693542957306 test loss: 7.284711740911007
Epoch: 7 training loss: 0.0563155822455883 test loss: 7.2380488105118275
Epoch: 8 training loss: 0.06447229534387589 test loss: 7.059233363717794
Epoch: 9 training loss: 0.1352655291557312 test loss: 7.099497094750404
Epoch: 10 training loss: 0.2300598919391632 test loss: 7.080623704940081
Epoch: 11 training loss: 0.18517839908599854 test loss: 7.224716249853373
Epoch: 12 training loss: 0.12983399629592896 test loss: 7.0947884656488895
Epoch: 13 training loss: 0.02836485765874386 test l

In [13]:
with torch.no_grad():
    correct = 0
    for i, data in enumerate(test_dataloader):
        input_data, label = data
        input_data = torch.autograd.Variable(input_data).float()
        label = torch.autograd.Variable(label).float()
        y_pred = model(input_data)
        loss = loss_fn(label, y_pred)
        y_pred = torch.round(y_pred.data)
        correct += (y_pred == label).sum()

    accuracy = correct/float(len(test_dataloader)*32)
    print(f"Accuracy: {accuracy.item()}")

Accuracy: 0.8417658805847168


In [14]:
y_pred

tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]])

In [15]:
label

tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.]])