References:
1. https://github.com/jf20541/LogisticRegressionPyTorch/blob/main/src/pytorchmodel.py
2. https://pytorch.org/tutorials/beginner/basics/
3. https://stackoverflow.com/questions/42704283/l1-l2-regularization-in-pytorch
4. https://gist.github.com/tuelwer/0b52817e9b6251d940fd8e2921ec5e20

TODOs

1. Add regularization
2. Explore LBFGS in PyTorch
3. Implement fairness variant

In [1]:
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset

random_state = 1
# torch.manual_seed(random_state)

In [2]:
df = pd.read_csv('../data/cleaned.csv', index_col=0)
df.head()

Unnamed: 0,Duration,Credit Amount,Installment rate,Residence,Age,Number of credits,Maintenance,Target,Account Status_<0,Account Status_<200,...,Housing_own,Housing_rent,Job_management/ highly qualified employee,Job_skilled employee / official,Job_unemployed/ unskilled - non-resident,Job_unskilled - resident,Telephone_none,Telephone_yes,Foreign_no,Foreign_yes
0,6,1169,4,4,67,2,1,1,1,0,...,1,0,0,1,0,0,0,1,0,1
1,48,5951,2,2,22,1,1,0,0,1,...,1,0,0,1,0,0,1,0,0,1
2,12,2096,2,3,49,1,2,1,0,0,...,1,0,0,0,0,1,1,0,0,1
3,42,7882,2,4,45,1,2,1,1,0,...,0,0,0,1,0,0,1,0,0,1
4,24,4870,3,4,53,2,2,0,1,0,...,0,0,0,1,0,0,1,0,0,1


In [3]:
Xs = torch.tensor(df.loc[:, df.columns != 'Target'].values)
ys = torch.tensor(df['Target'].values)
ys = ys.reshape(ys.shape[0], 1)

# class CustomDataset(Dataset):
#     def __init__(self, Xs, ys):
#         self.Xs = torch.tensor(Xs.values)
#         temp = torch.tensor(ys.values)
#         self.ys = temp.reshape(temp.shape[0], 1)

#     def __len__(self):
#         return len(self.Xs)

#     def __getitem__(self, idx):
#         return self.Xs[idx], self.ys[idx]

In [4]:
class LogisticRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return torch.sigmoid(self.linear(x))

In [5]:
loss_fn = nn.BCELoss()
l2_lambda = 0.01


def test_loop(Xs, ys, model):
    """
    If the model fails to find a reasonable solution, return True
    """
    size = Xs.shape[0]
    test_loss, correct = 0, 0

    with torch.no_grad():
        pred = model(Xs.float())
        test_loss += loss_fn(pred, ys.float()).item()
        
        # l2 regularization
        l2_reg = torch.tensor(0.)
        for w in model.parameters():
            l2_reg += w.norm(2)
        test_loss += l2_lambda * l2_reg
    
        correct += ((pred > 0.5).float() == ys).type(torch.float).sum().item()
        
    correct /= size

    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    
    return correct <= 0.75

In [6]:
def run_training():
    model = LogisticRegression(Xs.shape[1], 1)
    optimizer = torch.optim.LBFGS(model.parameters(), lr=0.1, line_search_fn='strong_wolfe')

    def closure():
        optimizer.zero_grad()

        # Compute prediction and loss
        pred = model(Xs.float())
        loss = loss_fn(pred, ys.float())

        # l2 regularization
        l2_reg = torch.tensor(0.)
        for w in model.parameters():
            l2_reg += w.norm(2)
        loss += l2_lambda * l2_reg

        loss.backward()

        return loss

    epochs = 20
    for t in range(epochs):
        optimizer.step(closure)
        
    return model
    
model = run_training()
while test_loop(Xs, ys, model):
    model = run_training()

Test Error: 
 Accuracy: 70.0%, Avg loss: 0.670875 

Test Error: 
 Accuracy: 75.7%, Avg loss: 0.500879 

