In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

import pyro
import pyro.distributions as dist

import numpy as np
import os
import sys
sys.path.append('../')

import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
from pyro.nn import PyroModule, PyroParam, PyroSample

import collections, itertools

In [3]:
data_dir = "/home/ec2-user/nta/data/"

In [6]:
dataset = PreprocessedDataset(
    cachefilepath=data_dir,
    basename="gsc_train",
    qualifiers=range(3)
)

loader = DataLoader(dataset, batch_size=32, shuffle=True)

In [42]:
class BNN(PyroModule):
    def __init__(self):
        super(BNN, self).__init__()
        
        self.cnn1 = nn.Conv2d(1,64, kernel_size=5, padding=0, stride=1)
        self.flatten = nn.Flatten()
        self.linear = nn.Linear(50176,11)
        self.softmax = nn.Softmax()
    
    def forward(self, x, y):
        x = F.relu(self.cnn1(x))
        x = self.flatten(x)
        x = self.softmax(self.linear(x))
        return x        

In [43]:
x,y = next(iter(loader))

In [44]:
net = BNN()

In [53]:
a = list(net.named_parameters())
a[0][0] n

'cnn1.weight'

In [58]:
list(net.named_children() )[0][0].ipynb_checkpoints

'cnn1'

In [25]:
from pyro.infer import SVI, Trace_ELBO, TraceEnum_ELBO, config_enumerate
from pyro.infer.autoguide import AutoDiagonalNormal
from pyro import poutine

@config_enumerate
def model(x,y):
    net= BNN().cuda()
    pyro.nn.module.to_pyro_module_(net)
    return net(x,y)

guide = AutoDiagonalNormal(poutine.block(model, expose=["obs"]))


elbo = TraceEnum_ELBO(max_plate_nesting=1)
# elbo.loss(net, config_enumerate(guide, "sequential"))

adam = pyro.optim.Adam({"lr": 0.03})
svi = SVI(model, guide, adam, elbo)


In [26]:
def train(svi, train_loader, use_cuda=False):
    # initialize loss accumulator
    epoch_loss = 0.
    # do a training epoch over each mini-batch x returned
    # by the data loader
    for x, y in train_loader:
        # if on GPU put mini-batch into CUDA memory
        if use_cuda:
            x = x.cuda()
            y = y.cuda()
        # do ELBO gradient and accumulate loss
        epoch_loss += svi.step(x,y)

    # return epoch loss
    normalizer_train = len(train_loader.dataset)
    total_epoch_loss_train = epoch_loss / normalizer_train
    return total_epoch_loss_train

In [27]:
num_epochs = 2

train_elbo = []

pyro.clear_param_store()
for epoch in range(num_epochs):
    total_epoch_loss_train = train(svi, loader, use_cuda=True)
    train_elbo.append(-total_epoch_loss_train)
    print("[epoch %03d]  average training loss: %.4f" % (epoch, total_epoch_loss_train))

AttributeError: 'BNN' object has no attribute 'dim'

In [None]:
from pyro.nn import PyroSample


class BayesianRegression(PyroModule):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.linear = PyroModule[nn.Linear](in_features, out_features)
        self.linear.weight = PyroSample(dist.Normal(0., 1.).expand([out_features, in_features]).to_event(2))
        self.linear.bias = PyroSample(dist.Normal(0., 10.).expand([out_features]).to_event(1))

    def forward(self, x, y=None):
        sigma = pyro.sample("sigma", dist.Uniform(0., 10.))
        mean = self.linear(x).squeeze(-1)
        with pyro.plate("data", x.shape[0]):
            obs = pyro.sample("obs", dist.Normal(mean, sigma), obs=y)
        return mean

In [5]:

class PreprocessedDataset(Dataset):
    def __init__(self, cachefilepath, basename, qualifiers):
        """
        A Pytorch Dataset class representing a pre-generated processed dataset stored in
        an efficient compressed numpy format (.npz). The dataset is represented by
        num_files copies, where each copy is a different variation of the full dataset.
        For example, for training with data augmentation, each copy might have been
        generated with a different random seed.  This class is useful if the
        pre-processing time is a significant fraction of training time.

        :param cachefilepath: String for the directory containing pre-processed data.

        :param basename: Base file name from which to construct actual file names.
        Actual file name will be "basename{}.npz".format(i) where i cycles through the
        list of qualifiers.

        :param qualifiers: List of qualifiers for each preprocessed files in this
        dataset.
        """
        self.path = cachefilepath
        self.basename = basename
        self.num_cycle = itertools.cycle(qualifiers)
        self.tensors = []
        self.load_next()

    def __getitem__(self, index):
        return tuple(tensor[index] for tensor in self.tensors)

    def __len__(self):
        return len(self.tensors[0])

    def load_next(self):
        """
        Call this to load the next copy into memory, such as at the end of an epoch.

        :return: Name of the file that was actually loaded.
        """
        return self.load_qualifier(next(self.num_cycle))

    def load_qualifier(self, qualifier):
        """
        Call this to load the a copy of a dataset with the specific qualifier into
        memory.

        :return: Name of the file that was actually loaded.
        """
        file_name = os.path.join(self.path, self.basename + "{}.npz".format(qualifier))
        self.tensors = list(np.load(file_name).values())
        return file_name

In [15]:
PyroSample?

In [17]:
net = BNN()

In [18]:
net

BNN(
  (cnn1): PyroConv2d(1, 64, kernel_size=(5, 5), stride=(1, 1))
  (flatten): Flatten()
  (linear): PyroLinear(in_features=50176, out_features=32, bias=True)
  (softmax): Softmax(dim=None)
)