In [97]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.autograd import Variable

In [98]:
dataset = pd.read_csv('/content/drive/My Drive/toy/Churn_Modelling.csv')

In [99]:
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [100]:
X = dataset.iloc[:, 3:-1].values
y = dataset.iloc[:, -1].values

In [101]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])

In [102]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [103]:
# Normalization
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)

In [104]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [105]:
y_train

array([0, 0, 0, ..., 0, 0, 1])

In [106]:
y = torch.from_numpy(y_train)
y.shape

torch.Size([8000])

In [107]:
# class Net(nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.fc1 = nn.Linear(12, 512)           # obviamente tem que bater com as features, e a input
#         self.fc2 = nn.Linear(512, 512)          # 512 escolhemos aleatoriamente, sao as hidden features
#         self.fc3 = nn.Linear(512, 2)
#         self.dropout = nn.Dropout(0.2)
        
#     def forward(self, x):
#         x = F.relu(self.fc1(x))
#         x = self.dropout(x)
#         x = F.relu(self.fc2(x))
#         x = self.dropout(x)
#         x = self.fc3(x)
#         return x
# model = Net()
# model

In [108]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(12,512),
        self.fc2 = nn.Linear(512, 2),
        
    def forward(self, x):
            x = F.tanh(self.fc1(x))
            x = self.fc2(x)
            x = F.softmax(x)
            return x
    model = Net()
    model

model = nn.Sequential(
            nn.Linear(12, 512),
            nn.Tanh(),
            nn.Linear(512, 2),
            nn.Softmax(dim=1))

In [144]:
a = np.array([[1,2],[3,4]])
a

array([[1, 2],
       [3, 4]])

In [145]:
t = torch.tensor([[1.,-2.],[3.,4.]])
t

tensor([[ 1., -2.],
        [ 3.,  4.]])

In [148]:
t_v = t.view(-1)
t_v = t_v.unsqueeze(1)
t_v

tensor([[ 1.],
        [-2.],
        [ 3.],
        [ 4.]])

In [109]:
[param.shape for param in model.parameters()]

[torch.Size([512, 12]),
 torch.Size([512]),
 torch.Size([2, 512]),
 torch.Size([2])]

In [110]:
for name, param in model.named_parameters():
  print(name, param.shape)

0.weight torch.Size([512, 12])
0.bias torch.Size([512])
2.weight torch.Size([2, 512])
2.bias torch.Size([2])


In [111]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)


batch_size = 64
n_epochs = 200
batch_no = len(X_train) // batch_size

train_loss = 0
train_loss_min = np.Inf
for epoch in range(n_epochs):
    for i in range(batch_no):
        start = i*batch_size
        end = start+batch_size
        x_var = Variable(torch.FloatTensor(X_train[start:end]))
        y_var = Variable(torch.LongTensor(y_train[start:end])) 
        
        optimizer.zero_grad()
        output = model(x_var)
        loss = criterion(output,y_var)
        loss.backward()
        optimizer.step()
        
        values, labels = torch.max(output, 1)
        num_right = np.sum(labels.data.numpy() == y_train[start:end])
        train_loss += loss.item()*batch_size
    
    train_loss = train_loss / len(X_train)
    if train_loss <= train_loss_min:
        print("Validation loss decreased ({:6f} ===> {:6f}). Saving the model...".format(train_loss_min,train_loss))
        torch.save(model.state_dict(), "model.pt")
        train_loss_min = train_loss
    
    if epoch % 200 == 0:
        print('')
        print("Epoch: {} \tTrain Loss: {} \tTrain Accuracy: {}".format(epoch+1, train_loss,num_right / len(y_train[start:end]) ))
print('Training Ended! ')

Validation loss decreased (   inf ===> 0.474386). Saving the model...

Epoch: 1 	Train Loss: 0.47438588666915893 	Train Accuracy: 0.84375
Validation loss decreased (0.474386 ===> 0.474386). Saving the model...
Validation loss decreased (0.474386 ===> 0.474356). Saving the model...
Validation loss decreased (0.474356 ===> 0.474326). Saving the model...
Validation loss decreased (0.474326 ===> 0.474297). Saving the model...
Validation loss decreased (0.474297 ===> 0.474267). Saving the model...
Validation loss decreased (0.474267 ===> 0.474238). Saving the model...
Validation loss decreased (0.474238 ===> 0.474208). Saving the model...
Validation loss decreased (0.474208 ===> 0.474179). Saving the model...
Validation loss decreased (0.474179 ===> 0.474150). Saving the model...
Validation loss decreased (0.474150 ===> 0.474121). Saving the model...
Validation loss decreased (0.474121 ===> 0.474091). Saving the model...
Validation loss decreased (0.474091 ===> 0.474062). Saving the model..

In [154]:
X_test_var = Variable(torch.FloatTensor(X_test), requires_grad=False) 
with torch.no_grad():
    test_result = model(X_test_var)
values, labels = torch.max(test_result, dim=1)

In [155]:
torch.max(test_result, dim=1)

torch.return_types.max(values=tensor([1.0000, 0.8358, 1.0000,  ..., 0.9945, 1.0000, 0.9909]), indices=tensor([0, 0, 0,  ..., 0, 0, 0]))

In [114]:
torch.max(test_result, 1)

torch.return_types.max(values=tensor([1.0000, 0.8358, 1.0000,  ..., 0.9945, 1.0000, 0.9909]), indices=tensor([0, 0, 0,  ..., 0, 0, 0]))

In [115]:
values

tensor([1.0000, 0.8358, 1.0000,  ..., 0.9945, 1.0000, 0.9909])

In [116]:
labels

tensor([0, 0, 0,  ..., 0, 0, 0])

In [117]:
from sklearn import metrics
metrics.accuracy_score(y_test, labels)

0.851

In [118]:
x_var = Variable(torch.FloatTensor(X_train[start:end]))
x_var.dtype

torch.float32