In [1]:
import torch
import torchvision
import torch.nn as nn
import pandas as pd
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, random_split

In [2]:
bank=pd.read_csv('bank.csv',delimiter=';')

In [3]:
bank.head()

Unnamed: 0,age,job,marital,education,default,housing,loan,contact,month,day_of_week,...,campaign,pdays,previous,poutcome,emp.var.rate,cons.price.idx,cons.conf.idx,euribor3m,nr.employed,y
0,56,housemaid,married,basic.4y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
1,57,services,married,high.school,unknown,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
2,37,services,married,high.school,no,yes,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
3,40,admin.,married,basic.6y,no,no,no,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no
4,56,services,married,high.school,no,no,yes,telephone,may,mon,...,1,999,0,nonexistent,1.1,93.994,-36.4,4.857,5191.0,no


In [4]:
num_rows = len(bank)
print(num_rows)

41188


In [5]:
num_cols = len(bank.columns)
print(num_cols)

21


In [6]:
input_cols = bank.columns[bank.columns!='y']
input_cols=list(input_cols)
print(input_cols)

['age', 'job', 'marital', 'education', 'default', 'housing', 'loan', 'contact', 'month', 'day_of_week', 'duration', 'campaign', 'pdays', 'previous', 'poutcome', 'emp.var.rate', 'cons.price.idx', 'cons.conf.idx', 'euribor3m', 'nr.employed']


In [7]:
categorical_cols = bank.columns.difference(bank._get_numeric_data().columns)
print(categorical_cols)

Index(['contact', 'day_of_week', 'default', 'education', 'housing', 'job',
       'loan', 'marital', 'month', 'poutcome', 'y'],
      dtype='object')


In [8]:
output_cols = ['y']

In [9]:
def dataframe_to_arrays(dataframe):
    # Make a copy of the original dataframe
    dataframe1 = dataframe.copy(deep=True)
    # Convert non-numeric categorical columns to numbers
    for col in categorical_cols:
        dataframe1[col] = dataframe1[col].astype('category').cat.codes
    # Extract input & outupts as numpy arrays
    inputs_array = dataframe1[input_cols].to_numpy()
    targets_array = dataframe1[output_cols].to_numpy()
    return inputs_array, targets_array

In [10]:
inputs_array, targets_array = dataframe_to_arrays(bank)
inputs_array, targets_array

(array([[ 5.6000e+01,  3.0000e+00,  1.0000e+00, ..., -3.6400e+01,
          4.8570e+00,  5.1910e+03],
        [ 5.7000e+01,  7.0000e+00,  1.0000e+00, ..., -3.6400e+01,
          4.8570e+00,  5.1910e+03],
        [ 3.7000e+01,  7.0000e+00,  1.0000e+00, ..., -3.6400e+01,
          4.8570e+00,  5.1910e+03],
        ...,
        [ 5.6000e+01,  5.0000e+00,  1.0000e+00, ..., -5.0800e+01,
          1.0280e+00,  4.9636e+03],
        [ 4.4000e+01,  9.0000e+00,  1.0000e+00, ..., -5.0800e+01,
          1.0280e+00,  4.9636e+03],
        [ 7.4000e+01,  5.0000e+00,  1.0000e+00, ..., -5.0800e+01,
          1.0280e+00,  4.9636e+03]]), array([[0],
        [0],
        [0],
        ...,
        [0],
        [1],
        [0]], dtype=int8))

In [11]:
inputs = torch.from_numpy(inputs_array).float()
targets = torch.tensor(targets_array, dtype= torch.long)
targets = targets.squeeze(dim=1)

In [12]:
targets

tensor([0, 0, 0,  ..., 0, 1, 0])

In [13]:
dataset = TensorDataset(inputs, targets)

In [14]:
val_percent = 0.2 # between 0.1 and 0.2
val_size = int(num_rows * val_percent)
train_size = num_rows - val_size


train_ds, val_ds = random_split(dataset, [train_size, val_size])

In [15]:
batch_size = 16

In [16]:
train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size)

In [17]:
input_size = len(input_cols)
output_size = 2

In [18]:
class InsuranceModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size,output_size)                  
        
    def forward(self, xb):
        out = self.linear(xb)                          
        return out
    
    def training_step(self, batch):
        inputs, targets = batch 
        # Generate predictions
        out = self(inputs)          
        # Calcuate loss
        loss = F.cross_entropy(out,targets)                         
        return loss
    
    def validation_step(self, batch):
        inputs, targets = batch
        # Generate predictions
        out = self(inputs)
        # Calculate loss
        loss = F.cross_entropy(out,targets) 
        acc = accuracy(out, targets)           
        return {'val_loss': loss.detach(), 'val_acc': acc.detach()}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean() 
        batch_acc = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_acc).mean()
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result, num_epochs):
        # Print result every 20th epoch
        if (epoch+1) % 1 == 0 or epoch == num_epochs-1:
            print("Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}".format(epoch+1, result['val_loss'], result['val_acc']))

In [54]:
model = InsuranceModel()

In [55]:
model

InsuranceModel(
  (linear): Linear(in_features=20, out_features=2, bias=True)
)

In [56]:
list(model.parameters())

[Parameter containing:
 tensor([[ 0.1438, -0.1912, -0.0986, -0.1772,  0.0086, -0.1881, -0.2169,  0.2165,
          -0.1047, -0.0874, -0.1592, -0.1944, -0.0077, -0.1448, -0.0631, -0.1910,
           0.1691, -0.1612, -0.1256, -0.0547],
         [-0.0393,  0.0072,  0.1105, -0.1076,  0.1928, -0.1725,  0.1789, -0.0759,
          -0.0256, -0.1091,  0.2078,  0.1255, -0.1993, -0.2208, -0.1688,  0.0491,
          -0.0232,  0.0275, -0.1015,  0.0337]], requires_grad=True),
 Parameter containing:
 tensor([ 0.0944, -0.0428], requires_grad=True)]

In [57]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

def evaluate(model, val_loader):
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
    history = []
    optimizer = opt_func(model.parameters(), lr)
    for epoch in range(epochs):
        # Training Phase 
        for batch in train_loader:
            loss = model.training_step(batch)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        # Validation phase
        result = evaluate(model, val_loader)
        model.epoch_end(epoch, result, epochs)
        history.append(result)
    return history

In [58]:
result = evaluate(model, val_loader)
print(result)

{'val_loss': 282.8543395996094, 'val_acc': 0.11322815716266632}


In [59]:
epochs = 5
lr = 1e-3
history1 = fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.Adam)

Epoch [1], val_loss: 0.5734, val_acc: 0.8307
Epoch [2], val_loss: 0.3146, val_acc: 0.8858
Epoch [3], val_loss: 0.5437, val_acc: 0.8386
Epoch [4], val_loss: 0.4278, val_acc: 0.8486
Epoch [5], val_loss: 0.3034, val_acc: 0.9024


In [63]:
def predict_single(input, target, model):
    p=[]
    inputs = input.unsqueeze(0)
    predictions = model(input) 
    probs = F.softmax(predictions)
    if probs[0]>=probs[1]:
        p=0
    else:
        p=1

    print("Target:", target)
    print("Prediction:",p )

In [64]:
input, target = val_ds[8]
predict_single(input, target, model)

  """


Target: tensor(1)
Prediction: 1


In [65]:
input, target = val_ds[0]
predict_single(input, target, model)

  """


Target: tensor(0)
Prediction: 0
