In [1]:
from sklearn import datasets
from keras.utils import to_categorical
import torch
import numpy as np
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.optimizer import Optimizer
from torch.utils import data

iris = datasets.load_iris()
iris_x, iris_y = iris.data, to_categorical(iris.target)

print(len(iris_x))

tensor_x = torch.Tensor(np.array(iris_x)) # transform to torch tensor
tensor_y = torch.Tensor(np.array(iris_y))

iris_dataset = data.TensorDataset(tensor_x,tensor_y) # create your datset

Using TensorFlow backend.


150


In [0]:
class SimulatedAnnealing(Optimizer):
    def __init__(self, params, sampler, t0=1, anneal_rate=0.001,
                 min_temp=1e-5, anneal_every=100):
        defaults = dict(sampler=sampler, t0=t0, t=t0, anneal_rate=anneal_rate,
                        min_temp=min_temp, anneal_every=anneal_every, iteration=0, toprint=True)
        super(SimulatedAnnealing, self).__init__(params, defaults)


    def step(self, closure=None):
        if closure is None:
            raise Exception("loss closure is required")

        loss = closure()

        for group in self.param_groups:
            sampler = group['sampler']

            cloned_params = [p.clone() for p in group['params']]

            for p in group['params']:
                if group['iteration'] > 0 \
                   and group['iteration'] % group['anneal_every'] == 0:
                
                    rate = -group['anneal_rate'] * group['iteration']
                    group['t'] = np.maximum(group['t0'] * np.exp(rate), group['min_temp'])

                random_perturbation = group['sampler'].sample(p.data.size())
                p.data = p.data / torch.norm(p.data)
                p.data.add_(random_perturbation)
                group['iteration'] += 1

            
            new_loss = closure()
            final_loss, is_accept, topr = self.anneal(loss, new_loss, group['t'], group['toprint'], group['min_temp'])
            group['toprint'] = topr
            if not is_accept:
                for p, prev_p in zip(group['params'], cloned_params):
                    p.data = prev_p.data

            return final_loss


    def anneal(self, loss, new_loss, t, toprint, min_t):
        def acceptance_prob(old, new, temp):
            return torch.exp((old - new)/temp)

        topr = toprint
        
        loss_v = loss.item()
        new_loss_v = new_loss.item()

        if new_loss_v < loss_v:
            return new_loss, True, topr
        else:
            # evaluate the metropolis criterion
            ap = acceptance_prob(loss, new_loss, t)
            ap_v = ap.item()
            if t == min_t and topr:
                print("old = ", loss_v, "| pert = ", new_loss_v, " | ap = ", ap_v, " | t = ", t)
                topr = False
            if ap_v > np.random.rand():
                return new_loss, True, topr

            # return the original loss if above fails
            # or if the temp is now annealed
            return loss, False, topr

In [0]:
class GaussianSampler(object):
    def __init__(self, mu, sigma, dtype='float', cuda=False):
        self.sigma = sigma
        self.mu = mu
        self.cuda = cuda
        self.dtype_str = dtype
        dtypes = {
            'float': torch.cuda.FloatTensor if cuda else torch.FloatTensor,
            'int': torch.cuda.IntTensor if cuda else torch.IntTensor,
            'long': torch.cuda.LongTensor if cuda else torch.LongTensor
        }
        self.dtype = dtypes[dtype]

    def sample(self, size):
        rand_float = torch.cuda.FloatTensor if self.cuda else torch.FloatTensor
        rand_block = rand_float(*size).normal_(self.mu, self.sigma)

        if self.dtype_str == 'int' or self.dtype_str == 'long':
            rand_block = rand_block.type(self.dtype)

        return rand_block

In [0]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.lin1 = torch.nn.Linear(4, 18)
        self.lin2 = torch.nn.Linear(18, 10)
        self.dropout = torch.nn.Dropout(p=0.5, inplace=False)
        self.lin3 = torch.nn.Linear(10, 3)

    def forward(self, x):
        x = F.relu(self.lin1(x))
        x = F.relu(self.lin2(x))
        # x = F.dropout(x, training=self.training)
        x = F.sigmoid(self.lin3(x))
        return x

In [5]:
epochs = 1000
log_interval = 500
batch_size = 30

sampler = GaussianSampler(mu=0, sigma=1, cuda=torch.cuda.is_available())

iris_dataloader = data.DataLoader(iris_dataset, batch_size=batch_size, shuffle=True)

loss_f = torch.nn.BCELoss(size_average=True)

def train(model, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(iris_dataloader):
        if torch.cuda.is_available():
            data, target = data.cuda(), target.cuda()

        data, target = torch.autograd.Variable(data), torch.autograd.Variable(target)

        def closure():
            optimizer.zero_grad()
            output = model(data)
            loss = loss_f(output, target)
            loss.backward()
            return loss

        loss = optimizer.step(closure)
        loss_v = loss.item()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(iris_dataloader.dataset),
                100. * batch_idx / len(iris_dataloader), loss_v))

def run(model, optimizer):
    for epoch in range(1, epochs + 1):
        train(model, optimizer, epoch)


import time
ar = [0.9, 0.01, 0.003, 0.0001, 0.000001]
results = []
for _ar in ar:
    model = Net()
    
    if torch.cuda.is_available():
        model.cuda()

    optimizer = SimulatedAnnealing(model.parameters(), sampler=sampler, anneal_rate=_ar)

    start_time = time.time()
    run(model, optimizer)
    time_to = time.time() - start_time
    if torch.cuda.is_available():
        data, target = tensor_x.cuda(), tensor_y.cuda()

    data, target = torch.autograd.Variable(data), torch.autograd.Variable(target)
    pred = model(data)
    loss = loss_f(pred, target)
    loss_v = loss.item()
    results.append([time_to, loss_v])
  



old =  0.7352328896522522 | pert =  9.392358779907227  | ap =  0.0  | t =  1e-05
old =  0.8151333332061768 | pert =  4.496001720428467  | ap =  0.0  | t =  1e-05
old =  0.43153101205825806 | pert =  11.077691078186035  | ap =  0.0  | t =  1e-05


In [7]:
for rate, result in zip(ar, results):
    print(rate, ' - ', result)

0.9  -  [13.578881740570068, 0.4852268695831299]
0.01  -  [13.542827606201172, 0.507444441318512]
0.003  -  [13.67728066444397, 0.4145023226737976]
0.0001  -  [13.689284324645996, 0.6666187047958374]
1e-06  -  [13.55225920677185, 2.4264583587646484]
