In [1]:
import scipy.io as scio
import numpy as np

import os
import time
import math
import torch
import torch.nn.functional as F
 
dataFile = './data1.mat'
data = scio.loadmat(dataFile)

In [2]:
data.keys()

dict_keys(['__header__', '__version__', '__globals__', 'TrainingX', 'TrainingY', 'TestX', 'TestY'])

In [3]:
x_train, y_train, x_test, y_test = data["TrainingX"], data["TrainingY"], data["TestX"], data["TestY"]
# x_train, y_train, x_test, y_test = np.array(x_train), np.array(y_train), np.array(x_test), np.array(y_test)

In [4]:
def kernel(X,Y):
    N1, N2 = len(X), len(Y)
    X_norm = np.sum(X ** 2, axis = -1)
    Y_norm = np.sum(Y ** 2, axis = -1)
    K = X_norm[:,None] + Y_norm[None,:] - 2 * np.dot(X, Y.T)
    sigma_2 = np.sum(K)/(N2**2)
    K = np.exp(-K/(2*sigma_2))
    return K

In [5]:
kx_train, kx_test = kernel(x_train,x_train), kernel(x_test, x_train)

dtype = torch.float32 
# put tensor on cpu(or you can try GPU)
device = torch.device("cpu")
KX_train = torch.tensor(kx_train, dtype=dtype, device=device)
Y_train = torch.tensor(y_train)

KX_test = torch.tensor(kx_test, dtype=dtype, device=device)
Y_test = torch.tensor(y_test)

In [6]:
def test_loss_accu(KX_test, Y_test, w, lambda_param):
    Z = torch.mm(KX_test, w)
    p = 1.0/(1.0+torch.exp(-Z))
    pred = (p>0.5)*2-1
    correct = pred.eq(Y_test.view_as(pred)).sum().item()
    accu = correct/len(Y_test)

    A = 1.0/(1.0+torch.exp(-Z*Y_test))
    l2_regularization = torch.sum(w ** 2)
    loss = -torch.mean(torch.log(A.view(-1)))+lambda_param*l2_regularization
    return loss, accu

#### Q1: GD

In [7]:
def GD(KX_train, Y_train, KX_test, Y_test, n_iterations, learning_rate, lambda_param, epsilon):   
    
    N = 10000
    w = torch.zeros((N,1), device=device, requires_grad = True)
    torch.nn.init.kaiming_uniform_(w, a=math.sqrt(1000))

    
    try:
        filename = './results/GD_{}_{}.txt'.format(learning_rate, lambda_param)
        os.remove(filename)
        print('Removed previous results!')
    except:
        pass

    t0 = time.time()
    t_test = 0
    for i in range(1, n_iterations + 1):

        Z = torch.mm(KX_train, w)
        p = 1.0/(1.0+torch.exp(-Z))
        pred = (p>0.5)*2-1
        correct = pred.eq(Y_train.view_as(pred)).sum().item()
        accu = correct/len(Y_train)

        A = 1.0/(1.0+torch.exp(-Z*Y_train))
        l2_regularization = torch.sum(w ** 2)
        loss = -torch.mean(torch.log(A.view(-1)))+lambda_param*l2_regularization
        if w.grad is not None:
            w.grad.zero_()  # 1

        loss.backward()  # 2

        with torch.no_grad():  # 3
            w -= learning_rate * w.grad


        if i == 1 or i % 25 == 0:
            t_test_0 = time.time()
            test_loss, test_accu = test_loss_accu(KX_test, Y_test, w, lambda_param)
            t_test += time.time()-t_test_0
            
            result = "iteration {}: Time:{:.2f}, TrainLoss: {:.4f}, TrainAccu: {:.4f}, TestLoss: {:.4f}, TestAccu: {:.4f}".format(i, time.time()-t_test-t0,loss, accu, test_loss, test_accu)
#             print(result)
            filename = './results/GD_{}_{}.txt'.format(learning_rate, lambda_param)
            with open(filename, 'a') as fp: 
                fp.write(result+'\n')

        if w.grad.norm()<epsilon:
            break
            

In [8]:
n_iterations = 10000 
learning_rates = [0.005]
lambda_param = 1e-4
epsilon = 1e-5

for learning_rate in learning_rates:
    GD(KX_train, Y_train, KX_test, Y_test, n_iterations, learning_rate, lambda_param, epsilon)

Removed previous results!


KeyboardInterrupt: 

#### Q2: SGD

In [None]:
from torch.utils.data import TensorDataset
import random

In [None]:
def SGD(KX_train, Y_train, KX_test, Y_test, n_epoches,batch_size, learning_rate, lambda_param, epsilon):
    N = 10000
    w = torch.zeros((N,1), device=device, requires_grad = True)
    torch.nn.init.kaiming_uniform_(w, a=math.sqrt(1000))
    
    try:
        filename = './results/SGD_{}_{}_{}.txt'.format(learning_rate, lambda_param,batch_size)
        os.remove(filename)
        print('Removed previous results!')
    except:
        pass

    train_dataset = TensorDataset(KX_train, Y_train)
    train_loader = torch.utils.data.DataLoader(train_dataset,
        batch_size=batch_size, shuffle=True, sampler=None,
        num_workers=4, pin_memory=True)
    
    t0 = time.time()
    t_test = 0
    for epoch in range(1, n_epoches + 1):
        for batch_idx, (data, target) in enumerate(train_loader):
            Z = torch.mm(data, w)
            p = 1.0/(1.0+torch.exp(-Z))
            pred = (p>0.5)*2-1
            correct = pred.eq(target.view_as(pred)).sum().item()
            accu = correct/len(target)

            A = 1.0/(1.0+torch.exp(-Z*target))
            l2_regularization = torch.sum(w ** 2)
            loss = -torch.mean(torch.log(A.view(-1)))+lambda_param*l2_regularization
            if w.grad is not None:
                w.grad.zero_()  # 1

            loss.backward()  # 2

            with torch.no_grad():  # 3
                w -= learning_rate * w.grad


            if (batch_idx*batch_size) % 500 == 0:
                t_test_0 = time.time()
                test_loss, test_accu = test_loss_accu(KX_test, Y_test, w, lambda_param)
                t_test += time.time()-t_test_0
                
                result = "epoch {}[{}/{} ({:.0f}%)]: Time:{:.2f}, TrainLoss: {:.4f}, TrainAccu: {:.4f}, TestLoss: {:.4f}, TestAccu: {:.4f}".format(epoch, batch_idx*batch_size, len(train_loader.dataset),
                        100. * batch_idx / len(train_loader), time.time()-t0, loss, accu, test_loss, test_accu)
#                 print(result)
                filename = './results/SGD_{}_{}_{}.txt'.format(learning_rate, lambda_param,batch_size)
                with open(filename, 'a') as fp: 
                    fp.write(result+'\n')

            if w.grad.norm()<epsilon:
                break

In [None]:
n_epoches = 100
batch_sizes = [1, 10, 100]
learning_rates =[0.001, 0.005]
lambda_param =1e-4
epsilon = 1e-5

for batch_size in batch_sizes:
    for learning_rate in learning_rates:
        SGD(KX_train, Y_train, KX_test, Y_test, n_epoches, batch_size, learning_rate, lambda_param, epsilon)

#### BFGS

In [None]:
x_train2 = np.vstack((x_train[:2000],x_train[-2000:]))
y_train2 = np.vstack((y_train[:2000],y_train[-2000:]))

In [None]:
kx_train2, kx_test2 = kernel(x_train2,x_train2), kernel(x_test, x_train2)

dtype = torch.float32 
# put tensor on cpu(or you can try GPU)
device = torch.device("cpu")

KX_train2 = torch.tensor(kx_train2, dtype=dtype, device=device)
Y_train2 = torch.tensor(y_train2)

KX_test2 = torch.tensor(kx_test2, dtype=dtype, device=device)
Y_test2 = torch.tensor(y_test)

In [None]:
def BFGS(KX_train, Y_train, KX_test, Y_test, n_iterations, learning_rate, lambda_param, epsilon):
    N = len(KX_train)
    w = torch.zeros((N,1), device=device, requires_grad = True)
    torch.nn.init.kaiming_uniform_(w, a=math.sqrt(1000))
    
    try:
        filename = './results/BFGS_{}_{}.txt'.format(learning_rate, lambda_param)
        os.remove(filename)
        print('Removed previous results!')
    except:
        pass

    train_dataset = TensorDataset(KX_train, Y_train)
    train_loader = torch.utils.data.DataLoader(train_dataset,
        batch_size=batch_size, shuffle=True, sampler=None,
        num_workers=4, pin_memory=True)
    
    t0 = time.time()
    t_test = 0
    
    Z = torch.mm(KX_train, w)
    p = 1.0/(1.0+torch.exp(-Z))
    pred = (p>0.5)*2-1
    correct = pred.eq(Y_train.view_as(pred)).sum().item()
    accu = correct/len(Y_train)

    A = 1.0/(1.0+torch.exp(-Z*Y_train))
    l2_regularization = torch.sum(w ** 2)
    loss = -torch.mean(torch.log(A.view(-1)))+lambda_param*l2_regularization
    if w.grad is not None:
        w.grad.zero_()  # 1

    loss.backward()  # 2
    
    t_test_0 = time.time()
    test_loss, test_accu = test_loss_accu(KX_test, Y_test, w, lambda_param)
    t_test += time.time()-t_test_0
    
    result = "iteration {}: Time:{:.2f}, TrainLoss: {:.4f}, TrainAccu: {:.4f}, TestLoss: {:.4f}, TestAccu: {:.4f}".format(1, time.time()-t_test-t0,loss, accu, test_loss, test_accu)
    print(result)
    filename = './results/BFGS_{}_{}.txt'.format(learning_rate, lambda_param)
    with open(filename, 'a') as fp: 
        fp.write(result+'\n')

    H, r, grad_past = torch.eye(N), torch.Tensor(N,1), torch.Tensor(N,1)
    r.copy_(w.grad)
    grad_past.copy_(w.grad)
    
    for i in range(2, n_iterations + 1):
        
        with torch.no_grad():  # 3
            w -= learning_rate * r
            
        Z = torch.mm(KX_train, w)
        p = 1.0/(1.0+torch.exp(-Z))
        pred = (p>0.5)*2-1
        correct = pred.eq(Y_train.view_as(pred)).sum().item()
        accu = correct/len(Y_train)

        A = 1.0/(1.0+torch.exp(-Z*Y_train))
        l2_regularization = torch.sum(w ** 2)
        loss = -torch.mean(torch.log(A.view(-1)))+lambda_param*l2_regularization
        if w.grad is not None:
            w.grad.zero_()  # 1

        loss.backward()  # 2
        
        s = -learning_rate * r
        y = w.grad - grad_past
        grad_past.copy_(w.grad)

        p = 1.0/s.view(-1).dot(y.view(-1))
        I = torch.eye(N)
        H = torch.matmul(torch.matmul(I-p*torch.matmul(s,y.t()), H), I-p*torch.matmul(y,s.t()))+p*torch.matmul(s,s.t())

        r = torch.matmul(H, w.grad)

        if i == 1 or i % 1 == 0:
            t_test_0 = time.time()
            test_loss, test_accu = test_loss_accu(KX_test, Y_test, w, lambda_param)
            t_test += time.time()-t_test_0

            result = "iteration {}: Time:{:.2f}, TrainLoss: {:.4f}, TrainAccu: {:.4f}, TestLoss: {:.4f}, TestAccu: {:.4f}".format(i, time.time()-t_test-t0,loss, accu, test_loss, test_accu)
#             print(result)
            filename = './results/BFGS_{}_{}.txt'.format(learning_rate, lambda_param)
            with open(filename, 'a') as fp: 
                fp.write(result+'\n')

        if w.grad.norm()<epsilon:
            break

In [None]:
n_iterations = 1000
learning_rates = [0.005]
lambda_param = 1e-4
epsilon = 1e-5

for learning_rate in learning_rates:
    BFGS(KX_train2, Y_train2, KX_test2, Y_test2, n_iterations, learning_rate, lambda_param, epsilon)

#### LBFGS

In [None]:
def LBFGS(KX_train, Y_train, KX_test, Y_test, n_iterations, history_size, learning_rate, lambda_param, epsilon):
    N = len(KX_train)
    w = torch.zeros((N,1), device=device, requires_grad = True)
    torch.nn.init.kaiming_uniform_(w, a=math.sqrt(1000))
    
    try:
        filename = './results/LBFGS_{}_{}.txt'.format(learning_rate, lambda_param)
        os.remove(filename)
        print('Removed previous results!')
    except:
        pass

    train_dataset = TensorDataset(KX_train, Y_train)
    train_loader = torch.utils.data.DataLoader(train_dataset,
        batch_size=batch_size, shuffle=True, sampler=None,
        num_workers=4, pin_memory=True)
    
    t0 = time.time()
    t_test = 0
    
    Z = torch.mm(KX_train, w)
    p = 1.0/(1.0+torch.exp(-Z))
    pred = (p>0.5)*2-1
    correct = pred.eq(Y_train.view_as(pred)).sum().item()
    accu = correct/len(Y_train)

    A = 1.0/(1.0+torch.exp(-Z*Y_train))
    l2_regularization = torch.sum(w ** 2)
    loss = -torch.mean(torch.log(A.view(-1)))+lambda_param*l2_regularization
    if w.grad is not None:
        w.grad.zero_()  # 1

    loss.backward()  # 2
    
    t_test_0 = time.time()
    test_loss, test_accu = test_loss_accu(KX_test, Y_test, w, lambda_param)
    t_test += time.time()-t_test_0
    
    result = "iteration {}: Time:{:.2f}, TrainLoss: {:.4f}, TrainAccu: {:.4f}, TestLoss: {:.4f}, TestAccu: {:.4f}".format(1, time.time()-t_test-t0,loss, accu, test_loss, test_accu)
    print(result)
    filename = './results/LBFGS_{}_{}.txt'.format(learning_rate, lambda_param)
    with open(filename, 'a') as fp: 
        fp.write(result+'\n')

    r, grad_past = torch.Tensor(N,1), torch.Tensor(N,1)
    r.copy_(w.grad)
    grad_past.copy_(w.grad)
    
    rhos, ss, ys = [], [], []
    
    for idx in range(2, n_iterations + 1):
        
        with torch.no_grad():  # 3
            w -= learning_rate * r
            
        Z = torch.mm(KX_train, w)
        p = 1.0/(1.0+torch.exp(-Z))
        pred = (p>0.5)*2-1
        correct = pred.eq(Y_train.view_as(pred)).sum().item()
        accu = correct/len(Y_train)

        A = 1.0/(1.0+torch.exp(-Z*Y_train))
        l2_regularization = torch.sum(w ** 2)
        loss = -torch.mean(torch.log(A.view(-1)))+lambda_param*l2_regularization
        if w.grad is not None:
            w.grad.zero_()  # 1

        loss.backward()  # 2
        
        s = (-learning_rate * r).view(-1)
        y = (w.grad - grad_past).view(-1)
        grad_past.copy_(w.grad)
        
        q = torch.zeros(N)
        q.copy_(w.grad.view(-1))
        y_s = s.dot(y)
        rho = 1.0/y_s
        
        
        if len(ss)>=history_size:
            ss.pop(0)
            ys.pop(0)
            rhos.pop(0)
            
        ss.append(s)
        ys.append(y)
        rhos.append(rho)
        
        H_diag = y_s/y.dot(y)
        
        num_old = len(ss)
        al = [None] * history_size
        for i in range(num_old - 1, -1, -1):
            al[i] = ss[i].dot(q) * rhos[i]
            q.add_(ys[i], alpha=-al[i])

        # multiply by initial Hessian
        # r/d is the final direction
        d = r = torch.mul(q, H_diag)
        for i in range(num_old):
            be_i = ys[i].dot(r) * rhos[i]
            r.add_(ss[i], alpha=al[i] - be_i)
        
        r = r.view(N,1)

#         p = 1.0/s.view(-1).dot(y.view(-1))
#         I = torch.eye(N)
#         H = torch.matmul(torch.matmul(I-p*torch.matmul(s,y.t()), H), I-p*torch.matmul(y,s.t()))+p*torch.matmul(s,s.t())

#         r = torch.matmul(H, w.grad)

        if i == 1 or i % 1 == 0:
            t_test_0 = time.time()
            test_loss, test_accu = test_loss_accu(KX_test, Y_test, w, lambda_param)
            t_test += time.time()-t_test_0

            result = "iteration {}: Time:{:.2f}, TrainLoss: {:.4f}, TrainAccu: {:.4f}, TestLoss: {:.4f}, TestAccu: {:.4f}".format(idx, time.time()-t_test-t0,loss, accu, test_loss, test_accu)
#             print(result)
            filename = './results/LBFGS_{}_{}.txt'.format(learning_rate, lambda_param)
            with open(filename, 'a') as fp: 
                fp.write(result+'\n')

        if w.grad.norm()<epsilon:
            break

In [None]:
n_iterations = 1000
history_size = 10
learning_rates = [0.01]
lambda_param = 1e-4
epsilon = 1e-5

for learning_rate in learning_rates:
    LBFGS(KX_train2, Y_train2, KX_test2, Y_test2, n_iterations,history_size,  learning_rate, lambda_param, epsilon)