In [1]:
require 'nn'
require 'nngraph'
require 'torch'
require 'image'

-- Create a table of options
opt = {
    data_dir = './',

    -- model parameters

    ninput = 310,
    nhidden = 100,
    noutput = 3,
    
    -- optimization parameters

    -- learning rate
    learning_rate = 1e-3,
    -- learning rate decay
    learning_rate_decay = 0.97,
    -- in number of epochs, when to start decaying the learning rate
    learning_rate_decay_after = 10,
    -- decay rate for rmsprop
    decay_rate = 0.95,
       
    -- batch size
    batch_size = 32,
    -- number of full passes through the training data
    max_epochs = 20,
    -- clip gradients at
    grad_clip = 5,
   
    -- bookkeeping

    -- torch manual random number generator seed
    seed = 123,
    -- how many steps/minibatches between printing out the loss
    print_every = 50,
    -- every how many iterations should we evaluate on validation data?
    eval_val_every = 50,
    -- output directory where checkpoints get written
    checkpoint_dir = 'cv',
    -- filename to autosave the checkpont to. Will be inside checkpoint_dir/
    savefile = 'mlqp',

}

torch.setnumthreads(2)
torch.manualSeed(opt.seed)

In [2]:
matio = require 'matio'
require 'math'
require 'torch'
require 'math'

MinibatchLoader = {}
MinibatchLoader.__index = MinibatchLoader

function MinibatchLoader.create(batch_size)
    local self = {}
    setmetatable(self, MinibatchLoader)

    -- construct a tensor with all the data
    print('loading data files...')
    local train_input = matio.load('hw1/train_test/train_data.mat')
    local train_label = matio.load('hw1/train_test/train_label.mat')
    
    train_label.train_label = train_label.train_label + 2

    self.x_batches = torch.Tensor(train_input.train_data:size())
    self.y_batches = torch.Tensor(train_label.train_label:size())
    --     shuffle the data
    shuffle = torch.randperm(train_input.train_data:size(1))
    for i = 1,train_input.train_data:size(1) do
        self.x_batches[i] = train_input.train_data[shuffle[i]]
        self.y_batches[i] = train_label.train_label[shuffle[i]]
    end
    
    self.nbatches = math.floor(self.x_batches:size(1)/batch_size)
    assert(self.x_batches:size(1) == self.y_batches:size(1))

    self.current_batch = 0
    self.batch_size = batch_size

    
    --  feature normalization
    local mean = {}
    local stdv = {}
    for i = 1,self.x_batches:size(2) do
        mean[i] = self.x_batches[{{},{i}}]:mean()
        self.x_batches[{{},{i}}]:add(-mean[i])
        stdv[i] = self.x_batches[{{},{i}}]:std()
        self.x_batches[{{},{i}}]:div(stdv[i])
    end
    
    print('data load done.')
    collectgarbage()
    return self
end

function MinibatchLoader:next_batch()
    self.current_batch = (self.current_batch % self.nbatches) + 1
    return self.x_batches[{{(self.current_batch-1)*self.batch_size+1,self.current_batch*self.batch_size},{}}], self.y_batches[{{(self.current_batch-1)*self.batch_size+1,self.current_batch*self.batch_size},{}}]
end

In [3]:
function get_network(nhidden, ninput, noutput)
    local input = nn.Identity()()
    local square_input = nn.Square()(input)
    
    local hh1 = nn.Linear(ninput, nhidden)(square_input)
    local hh2 = nn.Linear(ninput, nhidden)(input)
    
    local hh = nn.CAddTable()({hh1, hh2})
    local h = nn.Sigmoid()(hh)
    
    local square_h = nn.Square()(h)
    
    local output1 = nn.Linear(nhidden, noutput)(square_h)
    local output2 = nn.Linear(nhidden, noutput)(h)
    
    local output = nn.CAddTable()({output1, output2})
    
    nngraph.annotateNodes()
    return nn.gModule({input}, {output}) 
end

In [4]:
model = {}
model.mlqp = get_network(opt.nhidden, opt.ninput, opt.noutput)
model.criterion = nn.CrossEntropyCriterion()

params, grad_params = model.mlqp:getParameters()
graph.dot(model.mlqp.fg, 'mlqp', 'mlqp')

In [5]:
-- initialization
params:uniform(-0.08, 0.08) -- small numbers uniform
print('number of parameters in the model: ' .. params:nElement())

number of parameters in the model: 62806	


In [6]:
loader = MinibatchLoader.create(opt.batch_size)
function feval(x)
    if x ~= params then
        params:copy(x)
    end
    grad_params:zero()

    ------------------- forward pass -------------------
    local data, label = loader:next_batch()
    local output = model.mlqp:forward(data)
    local loss = model.criterion:forward(output,label)

    ------------------ backward pass -------------------
    local doutput = model.criterion:backward(output, label)
    model.mlqp:backward(data,doutput)
    -- clip gradient element-wise
    grad_params:clamp(-opt.grad_clip, opt.grad_clip)
    
--     grad_params:div(#data)
--     loss = loss/#data
    return loss, grad_params
end

loading data files...	


data load done.	


In [7]:
-- start optimization here
require 'optim'
function eval()
-- read data
    local test_input = matio.load('hw1/train_test/test_data.mat')
    local test_label = matio.load('hw1/train_test/test_label.mat')
    test_label.test_label = test_label.test_label + 2
    
        --  feature normalization
    local mean = {}
    local stdv = {}
    for i = 1,test_input.test_data:size(2) do
        mean[i] = test_input.test_data[{{},{i}}]:mean()
        test_input.test_data[{{},{i}}]:add(-mean[i])
        stdv[i] = test_input.test_data[{{},{i}}]:std()
        test_input.test_data[{{},{i}}]:div(stdv[i])
    end
    
    correct = 0
    for i=1,test_label.test_label:size(1) do
        local groundtruth = test_label.test_label[i]
        local prediction = model.mlqp:forward(test_input.test_data[i])
        local confidences, indices = torch.sort(prediction, true)
        if groundtruth[1] == indices[1] then
            correct = correct + 1
        end
    end

    print('Precision in Test Set '.. 100*correct/test_label.test_label:size(1) .. ' % ')
end


train_losses = {}
val_losses = {}
local optim_state = {learningRate = opt.learning_rate, alpha = opt.decay_rate}
local iterations = opt.max_epochs*loader.nbatches
local loss0 = nil
for i = 1, iterations do
    local timer = torch.Timer()
    local _, loss = optim.rmsprop(feval, params, optim_state)
    local time = timer:time().real
    -- exponential learning rate decay
    if i % loader.nbatches == 0 and opt.learning_rate_decay < 1 then
        if i >= opt.learning_rate_decay_after*loader.nbatches then
            local decay_factor = opt.learning_rate_decay
            optim_state.learningRate = optim_state.learningRate * decay_factor -- decay it
--             print('decayed learning rate by a factor ' .. decay_factor .. ' to ' .. optim_state.learningRate)
        end
    end
    -- every now and then or on last iteration
    if i % opt.eval_val_every == 0 or i == iterations then
        -- evaluate loss on validation data
        local val_loss = eval()
        val_losses[i] = val_loss
    end

    if i % opt.print_every == 0 then
        print(string.format("%d/%d (epoch %.3f), train_loss = %6.8f, grad/param norm = %6.4e, time/batch = %.2fs", i, iterations, i/iterations, loss[1], grad_params:norm() / params:norm(), time))
    end
   
    if i % 10 == 0 then collectgarbage() end

    -- handle early stopping if things are going really bad
    if loss0 == nil then loss0 = loss[1] end
    if loss[1] > loss0 * 3 then
        print('loss is exploding, aborting.')
        break -- halt
    end
end

Precision in Test Set 83.090379008746 % 	
50/300 (epoch 0.167), train_loss = 0.00962227, grad/param norm = 3.9188e-03, time/batch = 0.00s	


Precision in Test Set 83.090379008746 % 	
100/300 (epoch 0.333), train_loss = 0.00223159, grad/param norm = 9.7231e-04, time/batch = 0.00s	


Precision in Test Set 83.090379008746 % 	
150/300 (epoch 0.500), train_loss = 0.00068660, grad/param norm = 3.4923e-04, time/batch = 0.00s	


Precision in Test Set 83.090379008746 % 	


200/300 (epoch 0.667), train_loss = 0.00018212, grad/param norm = 7.9455e-05, time/batch = 0.00s	


Precision in Test Set 83.090379008746 % 	
250/300 (epoch 0.833), train_loss = 0.00005818, grad/param norm = 2.5810e-05, time/batch = 0.00s	


Precision in Test Set 83.090379008746 % 	


300/300 (epoch 1.000), train_loss = 0.00002039, grad/param norm = 1.0368e-05, time/batch = 0.00s	
