# Load [a9a](https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html#a9a) data

In [1]:
import numpy as np
from sklearn.datasets import load_svmlight_files

x_train, y_train, x_test, y_test = load_svmlight_files(['a9a', 'a9a.t'])
y_train = np.where(y_train > 0, y_train, y_train + 1)
y_test = np.where(y_test > 0, y_test, y_test + 1)

In [2]:
import torch
import torch.autograd
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F
import torch.multiprocessing as mp

In [3]:
def to_torch_sparse_tensor(M):
    M = M.tocoo().astype(np.float32)
    i = torch.from_numpy(np.vstack((M.row, M.col))).long()
    v = torch.from_numpy(M.data)
    shape = torch.Size(M.shape)
    return torch.sparse.FloatTensor(i, v, shape)

# Define Logistic Regression Model

In [4]:
model = torch.nn.Sequential()
model.add_module('linear', torch.nn.Linear(x_train.shape[1], 1))
model.add_module('sigmoid', torch.nn.Sigmoid())
optimizer = optim.Adagrad(model.parameters())

In [5]:
from sklearn.utils import shuffle

batch_size = 100
train_size = x_train.shape[0]

def train(model):
    x, y = shuffle(x_train, y_train, random_state=1)

    def get_batch():
        i = 0
        for i in range(0, train_size // batch_size):
            yield Variable(to_torch_sparse_tensor(x[i:i+batch_size]).to_dense()), Variable(torch.from_numpy(y[i:i+batch_size]).float())

    for vx, vy in get_batch():
        optimizer.zero_grad()
        fx = model.forward(vx)
        output = F.binary_cross_entropy(fx, vy)
        output.backward()
        optimizer.step()

    print(output.data[0])

In [6]:
for _ in range(0, 100):
    train(model)

  "Please ensure they have the same size.".format(target.size(), input.size()))


0.29141736030578613
0.2592703104019165
0.24327896535396576
0.23324090242385864
0.22619690001010895
0.2209147810935974
0.2167748659849167
0.21342603862285614
0.21065229177474976
0.20831218361854553
0.20630861818790436
0.20457252860069275
0.2030528485774994
0.20171146094799042
0.2005186825990677
0.1994512975215912
0.19849087297916412
0.19762231409549713
0.19683349132537842
0.19611425697803497
0.19545617699623108
0.19485199451446533
0.19429579377174377
0.19378234446048737
0.19330722093582153
0.19286659359931946
0.19245702028274536
0.1920756697654724
0.1917198896408081
0.19138753414154053
0.19107647240161896
0.19078484177589417
0.19051134586334229
0.19025419652462006
0.1900123804807663
0.18978460133075714
0.18956995010375977
0.18936733901500702
0.18917599320411682
0.18899506330490112
0.1888239085674286
0.18866193294525146
0.18850842118263245
0.18836286664009094
0.1882248818874359
0.18809381127357483
0.18796932697296143
0.18785108625888824
0.18773871660232544
0.18763186037540436
0.187530174

# Calculate AUC

In [7]:
from sklearn.metrics import roc_auc_score
roc_auc_score(y_test, model(Variable(to_torch_sparse_tensor(x_test).to_dense())).data.numpy())

0.86449031584102132