References:
1. https://github.com/jf20541/LogisticRegressionPyTorch/blob/main/src/pytorchmodel.py
2. https://pytorch.org/tutorials/beginner/basics/

In [1]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

random_state = 1
torch.manual_seed(random_state)

  from .autonotebook import tqdm as notebook_tqdm


<torch._C.Generator at 0x11eb66770>

In [2]:
df = pd.read_csv('../data/cleaned.csv', index_col=0)
df.head()

Unnamed: 0,Duration,Credit Amount,Installment rate,Residence,Age,Number of credits,Maintenance,Target,Account Status_<0,Account Status_<200,...,Housing_own,Housing_rent,Job_management/ highly qualified employee,Job_skilled employee / official,Job_unemployed/ unskilled - non-resident,Job_unskilled - resident,Telephone_none,Telephone_yes,Foreign_no,Foreign_yes
0,6,1169,4,4,67,2,1,1,1,0,...,1,0,0,1,0,0,0,1,0,1
1,48,5951,2,2,22,1,1,0,0,1,...,1,0,0,1,0,0,1,0,0,1
2,12,2096,2,3,49,1,2,1,0,0,...,1,0,0,0,0,1,1,0,0,1
3,42,7882,2,4,45,1,2,1,1,0,...,0,0,0,1,0,0,1,0,0,1
4,24,4870,3,4,53,2,2,0,1,0,...,0,0,0,1,0,0,1,0,0,1


In [3]:
Xs = df.loc[:, df.columns != 'Target']
ys = df['Target']

class CustomDataset(Dataset):
    def __init__(self, Xs, ys):
        self.Xs = torch.tensor(Xs.values)
        temp = torch.tensor(ys.values)
        self.ys = temp.reshape(temp.shape[0], 1)

    def __len__(self):
        return len(self.Xs)

    def __getitem__(self, idx):
        return self.Xs[idx], self.ys[idx]

In [4]:
class LogisticRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return torch.sigmoid(self.linear(x))

In [5]:
model = LogisticRegression(Xs.shape[1], 1)
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

all_data = CustomDataset(Xs, ys)

train_dataloader = DataLoader(all_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(all_data, batch_size=64, shuffle=True)

In [6]:
def train_loop(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X.float())
        loss = loss_fn(pred, y.float())

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")


def test_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X.float())
            test_loss += loss_fn(pred, y.float()).item()
            correct += ((pred > 0.5).float() == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [7]:
epochs = 10
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)
print("Done!")

Epoch 1
-------------------------------
loss: 67.876816  [    0/ 1000]
Test Error: 
 Accuracy: 30.0%, Avg loss: 56.625912 

Epoch 2
-------------------------------
loss: 47.026093  [    0/ 1000]
Test Error: 
 Accuracy: 30.0%, Avg loss: 45.265449 

Epoch 3
-------------------------------
loss: 41.177120  [    0/ 1000]
Test Error: 
 Accuracy: 30.0%, Avg loss: 10.212309 

Epoch 4
-------------------------------
loss: 7.752886  [    0/ 1000]
Test Error: 
 Accuracy: 70.0%, Avg loss: 19.526373 

Epoch 5
-------------------------------
loss: 18.927238  [    0/ 1000]
Test Error: 
 Accuracy: 70.0%, Avg loss: 15.874580 

Epoch 6
-------------------------------
loss: 18.764656  [    0/ 1000]
Test Error: 
 Accuracy: 58.4%, Avg loss: 0.984820 

Epoch 7
-------------------------------
loss: 1.117746  [    0/ 1000]
Test Error: 
 Accuracy: 64.0%, Avg loss: 1.224223 

Epoch 8
-------------------------------
loss: 0.796649  [    0/ 1000]
Test Error: 
 Accuracy: 51.1%, Avg loss: 0.871791 

Epoch 9
------