In [1]:
import torch

### Simulated Annealing Test

### Baseline Model 

In [22]:
import torch
import torch.nn as nn

###############
# ELM
###############
class ELM():
    def __init__(self, input_size, h_size, num_classes, device=None):
        self._input_size = input_size
        self._h_size = h_size
        self._output_size = num_classes
        self._device = device

        self._alpha = nn.init.uniform_(torch.empty(self._input_size, self._h_size, device=self._device), a=-1., b=1.)
        self._beta = nn.init.uniform_(torch.empty(self._h_size, self._output_size, device=self._device), a=-1., b=1.)
        
        self._alphaA = nn.init.ones_(torch.empty(self._input_size, self._h_size, device=self._device))
        self._betaA = nn.init.ones_(torch.empty(self._h_size, self._output_size, device=self._device))

#         self._bias = torch.zeros(self._h_size, device=self._device)
#         self._biasA = torch.ones_(self._h_size, device=self._device)

        self._activation = torch.relu

    def predict(self, x):
        h = self._activation(x.mm(self._alpha))
        out = h.mm(self._beta)

        return out

    def fit(self, x, t):
        temp = x.mm(self._alpha)
        H = self._activation(temp)

        H_pinv = torch.pinverse(H)
        self._beta = H_pinv.mm(t)


    def evaluate(self, x, t):
        y_pred = self.predict(x)
        acc = torch.sum(torch.argmax(y_pred, dim=1) == torch.argmax(t, dim=1)).item() / len(t)
        return acc

#####################
# Helper Functions
#####################
def to_onehot(batch_size, num_classes, y, device):
    # One hot encoding buffer that you create out of the loop and just keep reusing
    y_onehot = torch.FloatTensor(batch_size, num_classes).to(device)
    #y = y.type(dtype=torch.long)
    y = torch.unsqueeze(y, dim=1)
    # In your for loop
    y_onehot.zero_()
    y_onehot.scatter_(1, y, 1)

    return y_onehot

In [15]:
import argparse
import torch
import torchvision.datasets
from torch.utils.data import DataLoader
import torchvision.transforms as transforms


#################
# Parameters
#################
device = 'cpu'
image_size = 28*28
hidden_size = 200
num_classes = 10

##################
# Datasets
##################
transform = transforms.Compose([
    transforms.ToTensor(),
])
dataset = torchvision.datasets.MNIST(root='~/AI/Datasets/mnist/data', train=True, transform=transform, download=True)
test_dataset = torchvision.datasets.MNIST(root='~/AI/Datasets/mnist/data', train=False, transform=transform, download=True)

def get_all_data(dataset, num_workers=30, shuffle=False):
    dataset_size = len(dataset)
    data_loader = DataLoader(dataset, batch_size=dataset_size,
                             num_workers=num_workers, shuffle=shuffle)

    for i_batch, sample_batched in enumerate(data_loader):
        images, labels = sample_batched[0].view(len(dataset), -1).to(device), sample_batched[1].to(device)
    return images, labels

train_images , train_labels = get_all_data(dataset, shuffle=True)
train_labels = to_onehot(batch_size=len(dataset), num_classes=num_classes, y=train_labels, device=device)

test_images , test_labels = get_all_data(dataset, shuffle=False)
test_labels = to_onehot(batch_size=len(dataset), num_classes=num_classes, y=test_labels, device=device)



In [128]:
#################
# Model
#################
elm = ELM(input_size=image_size, h_size=5000, num_classes=num_classes, device=device)
elm.fit(train_images, train_labels)
accuracy = elm.evaluate(test_images, test_labels)

print('Accuracy: {}'.format(accuracy))

Accuracy: 0.9832833333333333


### Annealing Model

In [146]:
class Annealer():
    def __init__(self, input_size, h_size, num_classes, lr=1, device=None):
        self._input_size = input_size
        self._h_size = h_size
        self._output_size = num_classes
        self._device = device
        self._lr = lr

        self._alpha = nn.init.uniform_(torch.empty(self._input_size, self._h_size, device=self._device), a=-1., b=1.)
        self._beta = nn.init.uniform_(torch.empty(self._h_size, self._output_size, device=self._device), a=-1., b=1.)
        
        self._alphaA = nn.init.ones_(torch.empty(self._input_size, self._h_size, device=self._device))
        self._betaA = nn.init.ones_(torch.empty(self._h_size, self._output_size, device=self._device))

        self._activation = torch.relu
        self._loss = nn.L1Loss()

    def predict(self, x):
        x = torch.unsqueeze(x,0)
        h = self._activation(x.mm(self._alpha))
        out = h.mm(self._beta)

        return out
    
    # One sample
    def step(self, X, y):
        # get loss for sample prediction
        pred = self.predict(X)
#         print(pred)
        loss = self._loss(pred, y)
#         print(loss)
        
        # create boolean matrix for all active weights
        feature_map = self._activation(torch.unsqueeze(X,0).mm(self._alpha))
        _alphaA = self._alphaA*feature_map
        _alphaA[_alphaA!=0] = 1
        
        feature_map = feature_map.mm(self._beta)
        _betaA = self._betaA*feature_map
        _betaA[_betaA!=0] = 1
        
        # increase all weights in path and decrease all weights outside of path by ratio of loss
        self._alpha = self._alpha + _alphaA * loss * self._lr - torch.mean(self._alpha)
        self._beta = self._beta + _betaA * loss * self._lr - torch.mean(self._beta)
        
        _alphaA[_alphaA==0] = 2
        _betaA[_betaA==0] = 2
        _alphaA[_alphaA==1] = 0
        _betaA[_betaA==1] = 0
        _alphaA = _alphaA/2
        _betaA = _betaA/2
        
        self._alpha = self._alpha - _alphaA * loss * self._lr - torch.mean(self._alpha)
        self._beta = self._beta - _betaA * loss * self._lr - torch.mean(self._beta)
        

    def fit(self, x, t):
        for i, sample in enumerate(x):
            if i==100:
                return
            self.step(sample, t[i])
            
            
    def predict2(self, x):
#         x = torch.unsqueeze(x,0)
        h = self._activation(x.mm(self._alpha))
        out = h.mm(self._beta)

        return out


    def evaluate(self, x, t):
        y_pred = self.predict2(x)
        acc = torch.sum(torch.argmax(y_pred, dim=1) == torch.argmax(t, dim=1)).item() / len(t)
        return acc

In [147]:
#################
# Model
#################
a = Annealer(input_size=image_size, h_size=hidden_size, num_classes=num_classes, device=device)
a.fit(train_images, train_labels)
accuracy = a.evaluate(test_images, test_labels)

print('Accuracy: {}'.format(accuracy))

Accuracy: 0.18281666666666666
