In [1]:
import torch
import torchsso

import warnings
warnings.filterwarnings("ignore")

vecLib, which is a part of Accelerate, is known not to work correctly with Chainer.
We recommend using other BLAS libraries such as OpenBLAS.
For details of the issue, please see
https://docs.chainer.org/en/stable/tips.html#mnist-example-does-not-converge-in-cpu-mode-on-mac-os-x.

Please be aware that Mac OS X is not an officially supported OS.

  ''')  # NOQA


In [2]:
import os
import pandas as pd
import numpy as np
import json, re
from tqdm import tqdm_notebook

# Torch, Sklearn imports
from sklearn.model_selection import train_test_split
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import TensorDataset, Dataset, DataLoader, RandomSampler

# Sklearn and Matplotlib
from sklearn.datasets import make_blobs
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['font.size'] = 18
print(torch.__version__)

1.3.1


In [None]:
use_cuda = torch.cuda.is_available()
device = torch.device('cuda' if use_cuda else 'cpu')
use_cuda, device

In [None]:
 # Generate a dataset - Args
n_samples = 500
centers = 5
random_state = 5
batch_size = 5
epochs = 50
plot_interval = 50
fig_dir = 'tmp'
n_samples_for_mcplot = 20

In [None]:
X, y = make_blobs(n_samples=n_samples, n_features=2, centers=centers, random_state=random_state)
y[y < int(centers) / 2] = 0
y[y >= int(centers) / 2] = 1

In [None]:
X.shape, y.shape

In [None]:
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
h = 0.05
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),np.arange(y_min, y_max, h))
data_meshgrid = torch.from_numpy(np.c_[xx.ravel(), yy.ravel()]).type(torch.float).to(device)

X_tensor = torch.from_numpy(X).type(torch.float)
y_tensor = torch.from_numpy(y).type(torch.float)
train_dataset = TensorDataset(X_tensor, y_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size)

In [None]:
next(iter(train_loader))

In [None]:
def get_entropy(prob: torch.Tensor):
    entropy = - prob * torch.log(prob) - (1 - prob) * torch.log(1 - prob)
    entropy[entropy != entropy] = 0  # nan to zero
    entropy = entropy.detach().cpu().numpy()
    return entropy

## Model Definition

In [None]:
class MLP(nn.Module):
    def __init__(self, input_size, output_size, hidden_sizes=None):
        super(MLP, self).__init__()
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes
        if output_size is not None:
            self.output_size = output_size
            self.squeeze_output = False
        else:
            self.output_size = 1
            self.squeeze_output = True
        self.act = F.relu
        # Define layers
        if hidden_sizes is None:
            # Linear model
            self.hidden_layers = []
            self.output_layer = nn.Linear(self.input_size, self.output_size)
        else:
            # Neural network
            features = zip([self.input_size] + hidden_sizes[:-1], hidden_sizes)
            self.hidden_layers = nn.ModuleList([nn.Linear(in_features, out_features) for in_features, out_features in features])
            self.output_layer = nn.Linear(hidden_sizes[-1], self.output_size)

    def forward(self, x):
        x = x.view(-1, self.input_size)
        h = x
        for layer in self.hidden_layers:
            h = self.act(layer(h))
        out = self.output_layer(h)
        if self.squeeze_output:
            out = torch.squeeze(out).view([-1])
        return out

In [None]:
# Model arguments
model_kwargs = dict(input_size=2, 
                    output_size=None, 
                    hidden_sizes=[128])

In [None]:
model1 = MLP(**model_kwargs)
model1 = model1.to(device)
optimizer1 = torch.optim.Adam(model1.parameters())

model2 = MLP(**model_kwargs)
model2 = model2.to(device)
optimizer2 = torchsso.optim.VOGN(model2, dataset_size=len(train_loader.dataset))

## Train Loop

In [None]:
figpaths = []
i = 0  # iteration

In [None]:
# Run training
for epoch in range(epochs):

    model1.train()
    model2.train()

    for data, target in train_loader:

        data, target = data.to(device), target.to(device)

        def closure1():
            optimizer1.zero_grad()
            output = model1(data)
            loss = F.binary_cross_entropy_with_logits(output, target)
            loss.backward()
            return loss

        def closure2():
            optimizer2.zero_grad()
            output = model2(data)
            loss = F.binary_cross_entropy_with_logits(output, target)
            loss.backward()
            return loss, output

        loss1 = optimizer1.step(closure1)
        loss2, _ = optimizer2.step(closure2)

        if (i + 1) % plot_interval == 0:
            # Setup figures
            fig = plt.figure(figsize=(21, 6))
            gs = fig.add_gridspec(1, 3)

            # Decision boundary
            ax1 = fig.add_subplot(gs[0, 0])
            ax1.set_xlabel('Input 1')
            ax1.set_ylabel('Input 2')
            ax1.set_title(f'Iteration {i+1}')

            # Entropy (Adam)
            ax2 = fig.add_subplot(gs[0, 1])
            ax2.set_xlabel('Input 1')
            ax2.set_ylabel('Input 2')
            ax2.set_title(f'Entropy (Adam)')

            # Entropy (VOGN)
            ax3 = fig.add_subplot(gs[0, 2])
            ax3.set_xlabel('Input 1')
            ax3.set_ylabel('Input 2')
            ax3.set_title(f'Entropy (VOGN)')

            model1.eval()
            model2.eval()

            # (Adam)
            prob = torch.sigmoid(model1(data_meshgrid)).view(xx.shape)
            entropy = get_entropy(prob)
            pred = torch.round(prob).detach().cpu().numpy()

            plot = ax1.contour(xx, yy, pred, colors=['blue'], linewidths=[2])
            plot.collections[len(plot.collections)//2].set_label('Adam')
            im = ax2.pcolormesh(xx, yy, entropy)
            fig.colorbar(im, ax=ax2)

            # (VOGN) get MC samples
            prob, probs = optimizer2.prediction(data_meshgrid, keep_probs=True)
            prob = prob.view(xx.shape)
            entropy = get_entropy(prob)

            probs = probs[:n_samples_for_mcplot]
            preds = [torch.round(p).detach().cpu().numpy().reshape(xx.shape) for p in probs]
            for pred in preds:
                ax1.contour(xx, yy, pred, colors=['red'], alpha=0.01)
            im = ax3.pcolormesh(xx, yy, entropy)
            fig.colorbar(im, ax=ax3)

            # (VOGN) get mean prediction
            prob = optimizer2.prediction(data_meshgrid, mc=0).view(xx.shape)
            pred = torch.round(prob).detach().cpu().numpy()

            plot = ax1.contour(xx, yy, pred, colors=['red'], linewidths=[2])
            plot.collections[len(plot.collections)//2].set_label('VOGN')

            # plot samples
            for label, marker, color in zip([0, 1], ['o', 's'], ['white', 'gray']):
                _X = X[y == label]
                ax1.scatter(_X[:, 0], _X[:, 1], s=80, c=color, edgecolors='black', marker=marker)
                ax2.scatter(_X[:, 0], _X[:, 1], s=80, c=color, edgecolors='black', marker=marker)
                ax3.scatter(_X[:, 0], _X[:, 1], s=80, c=color, edgecolors='black', marker=marker)

            # save tmp figure
            ax1.grid(linestyle='--')
            ax2.grid(linestyle='--')
            ax3.grid(linestyle='--')
            ax1.set_yticks([-5, 0, 5, 10])
            ax2.set_yticks([-5, 0, 5, 10])
            ax3.set_yticks([-5, 0, 5, 10])
            ax1.legend(loc='lower right')
            ax1.set_aspect(0.8)
            plt.tight_layout()
            figname = f'iteration{i+1}.png'
            figpath = os.path.join(fig_dir, figname)
            if not os.path.isdir(fig_dir):
                os.makedirs(fig_dir)
            fig.savefig(figpath)
            plt.close(fig)
            figpaths.append(figpath)

        i += 1

    print(f'Train Epoch: {epoch+1}\tLoss(Adam): {loss1:.6f} Loss(VOGN): {loss2:.6f}')
