In [35]:
# 1. Read Dataset
# 2. Extract valuable features
# 3. Data preprocessing
#   3.1 Remove strings from data
#   3.2 Informations are equal to each other
# 4. Divide dataset to train and test set
# 5. Create Neural Network
# 6. Compile and Fit
# 7. Let's predict
# 8. Congrats!

In [36]:
import pandas as pd
from torch.utils import tensorboard

In [37]:
dataframe = pd.read_csv("Churn_Modelling.csv")
dataframe.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [38]:
dataframe = dataframe.iloc[:, 3:]
dataframe.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [39]:
x = dataframe.iloc[:, :-1].values
y = dataframe.iloc[:, -1].values

In [40]:
from sklearn.preprocessing import LabelEncoder
gender_encoder = LabelEncoder()
country_encoder = LabelEncoder()
x[:, 2] = gender_encoder.fit_transform(x[:, 2])
x[:, 1] = country_encoder.fit_transform(x[:, 1])
x[:, 1]

array([0, 2, 0, ..., 0, 1, 0], dtype=object)

In [41]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
x = sc.fit_transform(x)
x

array([[-0.32622142, -0.90188624, -1.09598752, ...,  0.64609167,
         0.97024255,  0.02188649],
       [-0.44003595,  1.51506738, -1.09598752, ..., -1.54776799,
         0.97024255,  0.21653375],
       [-1.53679418, -0.90188624, -1.09598752, ...,  0.64609167,
        -1.03067011,  0.2406869 ],
       ...,
       [ 0.60498839, -0.90188624, -1.09598752, ..., -1.54776799,
         0.97024255, -1.00864308],
       [ 1.25683526,  0.30659057,  0.91241915, ...,  0.64609167,
        -1.03067011, -0.12523071],
       [ 1.46377078, -0.90188624, -1.09598752, ...,  0.64609167,
        -1.03067011, -1.07636976]])

In [54]:
import torch
from torch import nn
from torch.nn import functional as F

class ArtificialNeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.input_layer = nn.Linear(10, 32)
        self.hidden_layer1 = nn.Linear(32, 64)
        self.hidden_layer2 = nn.Linear(64, 128)
        self.hidden_layer3 = nn.Linear(128, 32)
        self.output_layer = nn.Linear(32, 1)
        self.dropout = nn.Dropout(0.2)
        
    def forward(self, x):
        x = self.input_layer(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.hidden_layer1(x)
        x = F.relu(x)
        x = self.dropout(x)
        x = self.hidden_layer2(x)
        x = F.relu(x)
        x = self.hidden_layer3(x)
        x = F.relu(x)
        x = self.output_layer(x)
        y = F.sigmoid(x)
        
        return y
    

In [43]:
x = torch.from_numpy(x)
y = torch.from_numpy(y).view(-1, 1)
tensord_dataset = torch.utils.data.TensorDataset(x, y)


In [44]:
train_set, test_set = torch.utils.data.random_split(tensord_dataset, [int(0.8*x.shape[0]), int(0.2*x.shape[0])])
train_loader = torch.utils.data.DataLoader(train_set, batch_size=32, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=32, shuffle=True)

In [55]:
model = ArtificialNeuralNetwork()
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

nb_epoch = 50
summary = tensorboard.SummaryWriter()
for n in range(nb_epoch):
    for i, data in enumerate(train_loader):
        input_data, label = data
        input_data = torch.autograd.Variable(input_data).float()
        label = torch.autograd.Variable(label).float()
        y_pred = model(input_data)
        
        loss = criterion(label, y_pred)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        summary.add_scalar("loss/train", loss.item(), i + n*len(train_loader))
    
    test_loss = 0
    with torch.no_grad():
        for i, data in enumerate(test_loader):
                input_data, label = data
                input_data = torch.autograd.Variable(input_data).float()
                label = torch.autograd.Variable(label).float()
                y_pred = model(input_data)
                loss = criterion(y_pred, label)
                test_loss += loss.item()
        summary.add_scalar("loss/test", test_loss/len(test_loader), n)
    print(f"Epoch: {n} Training loss: {loss.item()} Test loss: {test_loss}")
    summary.flush()

Epoch: 0 Training loss: 0.14330829679965973 Test loss: 9.000027775764465
Epoch: 1 Training loss: 0.27556881308555603 Test loss: 8.741972055286169
Epoch: 2 Training loss: 0.11106398701667786 Test loss: 8.549344558268785
Epoch: 3 Training loss: 0.15933574736118317 Test loss: 7.938890028744936
Epoch: 4 Training loss: 0.09078722447156906 Test loss: 7.672054821625352
Epoch: 5 Training loss: 0.12701161205768585 Test loss: 7.566258877515793
Epoch: 6 Training loss: 0.11383941024541855 Test loss: 7.4150767251849174
Epoch: 7 Training loss: 0.11966245621442795 Test loss: 7.426450416445732
Epoch: 8 Training loss: 0.12747353315353394 Test loss: 7.369545686990023
Epoch: 9 Training loss: 0.03354477509856224 Test loss: 7.484863739460707
Epoch: 10 Training loss: 0.08031602203845978 Test loss: 7.2364862114191055
Epoch: 11 Training loss: 0.1529054492712021 Test loss: 7.3479766510427
Epoch: 12 Training loss: 0.12289813905954361 Test loss: 7.154394589364529
Epoch: 13 Training loss: 0.07467209547758102 Test

In [56]:
model.eval()
correct = 0
batch_size=32
with torch.no_grad():
    for i, data in enumerate(test_loader):
            input_data, label = data
            input_data = torch.autograd.Variable(input_data).float()
            label = torch.autograd.Variable(label).float()
            y_pred = model(input_data)
            loss = criterion(y_pred, label)
            y_pred = torch.round(y_pred.data)
            correct += (y_pred == label).sum() 
    accuracy = correct/float(len(test_loader)*batch_size)
    print(f"Accuracy = {accuracy.item()}")

Accuracy = 0.8373016119003296


In [17]:
print(y_pred)
print(label)

tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]])
tensor([[0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.]])
