In [1]:
# default_exp core

# The foundations

> Let's try to replicate the standard usage of MNL frameworks

We will try to implement a basic MNL package to compare against biogeme/others... let's start from this [blog post](https://aaronkub.com/2020/02/12/logistic-regression-with-pytorch.html)

In [2]:
#export
import random

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset

from fastprogress import progress_bar, master_bar
from fastcore.all import *

grab some data

In [3]:
data = pd.read_csv("./data/Iris.csv").drop("Id", axis=1)

X_numpy = data.drop("Species", axis=1).values

target_map = {
    val: index for index, val in enumerate(data.Species.unique())
}
y_numpy = data.Species.map(target_map).values

X = torch.tensor(X_numpy, dtype=torch.float32)
y = torch.tensor(y_numpy)

In [4]:
data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [5]:
target_map

{'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}

In [6]:
#export
def prepare_data(data, x_cols=None, target_col=None):
    "This is far from optimal, as we shu=ould be reading values lazily"
    target_col = ifnone(target_col, list(data.columns)[-1])
    x_cols = [col for col in ifnone(x_cols, list(data.columns)) if col!=target_col]
    print(f'{x_cols=},\n{target_col=}')
    X_numpy = data.loc[:, x_cols].values
    target_map = {
        val: index for index, val in enumerate(data.loc[:,target_col].unique())
    }
    y_numpy = data.loc[:,target_col].map(target_map).values
    
    X = torch.tensor(X_numpy, dtype=torch.float32)
    y = torch.tensor(y_numpy)
    
    return X, y

In [7]:
X, y = prepare_data(data)

x_cols=['SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm'],
target_col='Species'


In [8]:
X[0:5], y[0:5]

(tensor([[5.1000, 3.5000, 1.4000, 0.2000],
         [4.9000, 3.0000, 1.4000, 0.2000],
         [4.7000, 3.2000, 1.3000, 0.2000],
         [4.6000, 3.1000, 1.5000, 0.2000],
         [5.0000, 3.6000, 1.4000, 0.2000]]),
 tensor([0, 0, 0, 0, 0]))

In [9]:
len(X), len(y)

(150, 150)

## Train/Test split

In [10]:
random.shuffle(range_of(X))

In [11]:
#export
def train_valid_split(X, y, pct=0.2, shuffle=True):
    assert len(X) == len(y), "X and y don't have the same number of elements"
    indices = range_of(X)
    if shuffle:
        random.shuffle(indices)
    n = len(X)
    n_train = int(n * (1-0.2))
    X_train, y_train = X[indices[:n_train]], y[indices[:n_train]]
    X_valid, y_valid = X[indices[n_train:]], y[indices[n_train:]]
    return X_train, y_train, X_valid, y_valid

In [12]:
_X = L([0,1,2,3,4,5,6,7,8,9])
_y = L('a,b,c,d,e,f,g,h,i,j'.split(','))

test_eq(train_valid_split(_X,_y, shuffle=False)[0], [0,1,2,3,4,5,6,7])
test_eq(train_valid_split(_X,_y, shuffle=False)[3], ['i','j'])

## Model

In [13]:
#export
class LinearMNL(nn.Module):
    
    def __init__(self, in_dim=4, out_dim=5, bias=False):
        super().__init__()
        store_attr()
        self.linear = nn.Linear(in_dim, out_dim, bias=bias)
        
    def forward(self, x):
        return self.linear(x)

In [14]:
model = LinearMNL(4,3)
x = torch.rand(10,4)

model(x).shape

torch.Size([10, 3])

In [15]:
loss_func = torch.nn.CrossEntropyLoss()

In [16]:
#export
class DataLoaders:
    """
    A class to store dataloaders (train/valid/test....)"""
    def __init__(self, train_dl, valid_dl=None):
        store_attr()
        
    def one_batch(self, dl=None):
        dl = ifnone(dl, self.dl_train)
        return next(iter(dl))
    
    @delegates(DataLoader, but='batch_size')
    @classmethod
    def from_datasets(cls, train_ds, valid_ds=None, batch_size=1, **kwargs):
        train_dl = DataLoader(train_ds, batch_size=batch_size, **kwargs)
        if valid_ds is not None:
            valid_dl = DataLoader(valid_ds, batch_size=2*batch_size, **kwargs)
        else:
            valid_dl = None
        return cls(train_dl, valid_dl)
    
    @delegates(DataLoader, but='batch_size')
    @classmethod
    def from_Xy(cls, X, y, pct=None, batch_size=1, **kwargs):
        if pct is not None:
            X_train, y_train, X_valid, y_valid = train_valid_split(X, y, pct)
        else:
            X_train, y_train, X_valid, y_valid = X, y, None, None
        train_ds = TensorDataset(X_train, y_train)
        if X_valid is not None:
            valid_ds = TensorDataset(X_valid, y_valid)
        else:
            valid_ds = None
        return cls.from_datasets(train_ds, valid_ds, batch_size)
        

In [17]:
dls = DataLoaders.from_Xy(X, y, pct=0.2, batch_size=8)

In [61]:
#export
class Learner:
    "A wrapper around dls, model and optimizer"
    def __init__(self, dls, model, loss_func=torch.nn.CrossEntropyLoss()):
        store_attr()
        

    def train_one_epoch(self):
        for batch, (x, y) in enumerate(self.dls.train_dl):
            pred = self.model(x)  # 1
            loss = self.loss_func(pred, y)

            #backprop
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()
        return loss
    
    def validate(self, dl=None):
        dl = ifnone(dl, self.dls.valid_dl)
        if (dl is None):
            return 'No validation data'
        val_loss, accu = 0, 0
        with torch.no_grad():
            for batch, (x, y) in enumerate(dl):
                pred = self.model(x)
                val_loss += self.loss_func(pred, y).item()
                accu += (pred.argmax(1) == y).type(torch.float).sum().item()
        return val_loss, accu / len(dl.dataset)
    
    def fit(self, n_epochs=10, lr=0.01, wd=0.01):
        
        self.optimizer = torch.optim.SGD(
            self.model.parameters(), 
            lr=lr, 
            weight_decay=wd
        )
        
        for epoch in progress_bar(range_of(n_epochs)):
            loss = self.train_one_epoch()
            val_loss, accuracy = self.validate()
            print(f'epoch = {epoch:3.0f}, val_loss = {val_loss:.3f}, accuracy = {accuracy:.2f}')

In [62]:
model = LinearMNL(4,3)
learn = Learner(dls, model, loss_func)

In [63]:
learn.fit(20)

epoch =   0, val_loss = 1.581, accuracy = 0.90
epoch =   1, val_loss = 1.494, accuracy = 0.93
epoch =   2, val_loss = 1.431, accuracy = 0.93
epoch =   3, val_loss = 1.377, accuracy = 0.93
epoch =   4, val_loss = 1.331, accuracy = 0.93
epoch =   5, val_loss = 1.291, accuracy = 0.93
epoch =   6, val_loss = 1.255, accuracy = 0.93
epoch =   7, val_loss = 1.224, accuracy = 0.93
epoch =   8, val_loss = 1.196, accuracy = 0.93
epoch =   9, val_loss = 1.171, accuracy = 0.93
epoch =  10, val_loss = 1.148, accuracy = 0.97
epoch =  11, val_loss = 1.127, accuracy = 0.97
epoch =  12, val_loss = 1.108, accuracy = 1.00
epoch =  13, val_loss = 1.090, accuracy = 1.00
epoch =  14, val_loss = 1.074, accuracy = 1.00
epoch =  15, val_loss = 1.058, accuracy = 1.00
epoch =  16, val_loss = 1.044, accuracy = 1.00
epoch =  17, val_loss = 1.031, accuracy = 1.00
epoch =  18, val_loss = 1.018, accuracy = 1.00
epoch =  19, val_loss = 1.006, accuracy = 1.00


## Export

In [64]:
# hide
from nbdev.export import *
notebook2script()

Converted 00_core.ipynb.
Converted index.ipynb.
