In [1]:
import numpy as np
import torch
import torch.utils.data as tud
import torch.tensor as T
from torch.autograd import Variable as V
import pickle
import gzip
import torch.optim
import torch.nn as nn
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

## Importing and normalizing the MNIST dataset which will be used for benchmarking

In [2]:
URL='http://deeplearning.net/data/mnist/'
FILENAME='mnist.pkl.gz'

def load_mnist(filename):
    return pickle.load(gzip.open(filename, 'rb'), encoding='latin-1')
path = 'data/mnist/'

df = load_mnist(path+FILENAME)

(x,y),(x_valid,y_valid),(x_test,y_test) = df
mean = x.mean()
std = x.std()

x=(x-mean)/std

x_valid = (x_valid-mean)/std

# Defining the Logistic Regression network architecture using torch nn module

In [3]:
def get_weights(*dims): return nn.Parameter(torch.randn(dims)/dims[0])
def softmax(x): return torch.exp(x)/(torch.exp(x).sum(dim=1)[:,None])

class LogReg_torch(nn.Module):
    def __init__(self,dims,output):
        super().__init__()
        self.dims, self.output = dims, output
        self.l1_w = get_weights(dims, output)  # Layer 1 weights
        self.l1_b = get_weights(output)         # Layer 1 bias

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = (x @ self.l1_w) + self.l1_b  # Linear Layer
        x = torch.log(softmax(x)) # Non-linear (LogSoftmax) Layer
        return x

## Defining a class for logistic regression (dataloader, fit and predict) with an API similar to SKLearn API (Also supports GPU)

In [260]:
class LogisticRegresssion_T():
    def __init__(self,epochs = 3,batch_size='default', lr = 3e-3, weight_decay = 0, gpu = 0):
        self.epochs,self.batch_size = epochs, batch_size
        self.device = "cuda" if gpu ==1  else "cpu"
        self.lr = lr
        self.weight_decay = weight_decay
    def dataloader_gpu(self,x,y):
        x_t = T(x); y_t = T(y)
        dataset = tud.TensorDataset(x_t.to(self.device),y_t.to(self.device))
        dl = tud.DataLoader(dataset,batch_size = self.batch_size)
        return dl
    
    def fit(self,x,y):
        self.batch_size = int(x.shape[0]/210) if self.batch_size == 'default' else self.batch_size 
        dims = x.shape[1]; output = len(set(y))
        trn_dl = self.dataloader_gpu(x,y)
        loss = nn.NLLLoss()
        self.net = LogReg_torch(dims,output).to(self.device)
#         optimizer =torch.optim.SGD(self.net.parameters(), lr=self.lr, weight_decay=self.weight_decay)
        optimizer = torch.optim.Adam(self.net.parameters(), lr = self.lr, weight_decay = self.weight_decay)
        trn_dl = self.dataloader_gpu(x,y)
        for epoch in range(self.epochs):
            losses=[]
            dl = iter(trn_dl)
            for t in range(len(dl)):
                xt, yt = next(dl)
                y_pred = self.net(V(xt).to(self.device))
                l = loss(y_pred, V(yt).to(self.device))
                losses.append(l)
                optimizer.zero_grad()        
                l.backward()       
                optimizer.step()
#             accuracy = accuracy_score(self.predict(x),y)
#             print("epoch: ",epoch,  "\t loss: ", l.item(), "\t accuracy: ", accuracy)
    def predict(self,x): return self.net(T(x).to(self.device)).detach().cpu().numpy().argmax(1)

## Comparing Performance vs. SKLearn Logistic Regression

In [261]:
TorchLog_cpu = LogisticRegresssion_T()
SKLog_cpu = LogisticRegression(multi_class='auto', solver= 'lbfgs')

In [262]:
%time TorchLog_cpu.fit(x,y)
accuracy_score(y_valid,TorchLog_cpu.predict(x_valid))

Wall time: 4.46 s


0.9268

In [263]:
%time SKLog_cpu.fit(x,y)
accuracy_score(y_valid,SKLog_cpu.predict(x_valid))



Wall time: 10.3 s


0.9266

## Results

 **Torch Version of Logistic Regression achieved the same accuracy as sklearn Logistic Regression (92.68% vs 92.66%), while the performance of the Torch Version is better with more than 100% (4.46s vs 10.3s), I have tried to use n_jobs in sklearn version but it somehow gives worse results. Torch version also provides the option to use GPU which will yield much better results on bigger datasets**