In [4]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torch.nn.functional as F
import torch.utils as utils
import torch.utils.data
import torchvision
import torchvision.transforms as transforms
import torchvision.transforms.functional as TF
import torchvision.datasets as dsets
from torchvision import models
from torchvision.utils import save_image
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter('runs/thesis02')


from tqdm import tqdm

from flows import PlanarFlow
from utils import Binarize
from codes import Linear_flipout, Flatten, count_parameters, EfficientNet


from torchmeta.datasets import Omniglot, CIFARFS
from torchmeta.transforms import Categorical, ClassSplitter, Rotation
from torchvision.transforms import Compose, Resize, ToTensor
from torchmeta.utils.data import BatchMetaDataLoader


#from __future__ import print_function
import argparse
import cv2
import matplotlib.pyplot as plt

import os
cur_dir = "C:/Users/KJH/OneDrive - skku.edu/KJH/Projects/2019winter_research"
#cur_dir = "C:/Users/KJH-Laptop/OneDrive - skku.edu/KJH/Projects/2019winter_research/"
os.chdir(cur_dir)
#os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

import time
import copy
import random as rd

device = torch.device('cuda')

class bnn(nn.Module):
    def __init__(self, num_classes):
        super(bnn, self).__init__()
        self.num_classes = num_classes
        self.encoder = nn.Sequential(
            Flatten(),

            nn.Linear(784, 256, bias = False),
            nn.ELU()
        ) 
        self.l0 = nn.Sequential(
            nn.BatchNorm1d(256),
            nn.ELU(),
            nn.Linear(256, 256)
        )
        self.l1 = nn.Sequential(
            nn.BatchNorm1d(256),
            nn.ELU(),
            nn.Linear(256, 256)
        )
        self.l2 = nn.Sequential(
            nn.BatchNorm1d(256),
            nn.ELU(),
            nn.Linear(256, self.num_classes)
        )
        self.num_hiddens = [
            [256],
            [256],
            [256]
        ]

    def sample_noise(self, noise):
        return torch.sigmoid(noise[0] + noise[1] * torch.empty(noise[0].data.shape, device = device).normal_(0, 1))
            
    def forward(self, x, noise):            
        x = self.encoder(x)
        x = self.l0(x * self.sample_noise(noise[0]))
        x = self.l1(x * self.sample_noise(noise[1]))
        x = self.l2(x * self.sample_noise(noise[2]))
        return x
    
    def noise_grad(self):
        grads = []
        for layer in self.noise:
            grads.append([layer[0].grad, layer[1].grad])
        return torch.stack([torch.stack(x, dim = 0) for x in grads], dim = 0)
    
    
class sampler_net(nn.Module):
    def __init__(self, num_classes):
        super(sampler_net, self).__init__()
        self.input_dim = [1, 28, 28]
        self.num_classes = num_classes
        
        self.ctx = torch.hub.load('rwightman/gen-efficientnet-pytorch', 'efficientnet_b0', pretrained=True)
        self.ctx.conv_stem = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        self.ctx.classifier = nn.Identity()
        
        self.layer = nn.LSTM(input_size=1280, hidden_size=1280, num_layers=5, bias=True, batch_first = True).to(device)
        self.dec_mu = nn.ModuleList()
        self.dec_logvar = nn.ModuleList()
        
        self.sample_net = bnn(self.num_classes)
        self.num_params = self.sample_net.num_hiddens
        
        for layer_size in self.num_params:
            self.dec_mu.append(
                nn.Sequential(
                    nn.ELU(),
                    nn.Linear(1280, 256, bias = True),
                    nn.ELU(),
                    nn.Linear(256, 256, bias = True),
                ).to(device))
            
            self.dec_logvar.append(
                nn.Sequential(
                    nn.ELU(),
                    nn.Linear(1280, 256, bias = True),
                    nn.ELU(),
                    nn.Linear(256, 256, bias = True),
                ).to(device))
            
            
    def forward(self, x_train, label_train, x_test):
        x = x_train
        ctx = self.ctx(x).view(x.shape[0], self.num_classes, 256)
        h = torch.stack([ctx[torch.where(label_train == x)].mean(dim = 0) for x in range(self.num_classes)], dim = 1)
        h = h.view(self.num_classes, 1, -1)

        out = []
        kld = torch.tensor(0., device=device)
        c = torch.empty([5, 1, 1280], requires_grad = False, device = device).fill_(0)
        for dec_mu, dec_logvar in zip(self.dec_mu, self.dec_logvar):
            x = torch.empty([1, 1, 1280], requires_grad = False, device = device).normal_(0, 1)
            f, (h, c) = self.layer(x, (h, c))
            mu = dec_mu(f.view(1, -1)).squeeze()
            logvar = dec_logvar(f.view(1, -1)).squeeze()
            out.append([mu, logvar])
            kld += (mu.pow(2) - logvar + logvar.exp() - 1).mean()/2
        return self.sample_net(x_test, out), kld

In [None]:
batch_size = 16
meta_trainset = Omniglot('./data/',
                   # Number of ways
                   num_classes_per_task=5,
                   # Resize the images to 28x28 and converts them to PyTorch tensors (from Torchvision)
                   transform=Compose([Resize(28), ToTensor()]),
                   # Transform the labels to integers (e.g. ("Glagolitic/character01", "Sanskrit/character14", ...) to (0, 1, ...))
                   target_transform=Categorical(num_classes=5),
                   # Creates new virtual classes with rotated versions of the images (from Santoro et al., 2016)
                   class_augmentations=[Rotation([90, 180, 270])],
                   meta_train=True,
                   download=True)
meta_trainset = ClassSplitter(meta_trainset, shuffle=True, num_train_per_class=1, num_test_per_class=15)
meta_trainloader = BatchMetaDataLoader(meta_trainset, batch_size=batch_size, num_workers=0)

model = sampler_net(5).cuda()
sample_optimizer = optim.SGD(list(model.sample_net.parameters()), lr = 0.1)
optimizer = optim.Adam(list(model.sample_net.parameters()) +
                       list(model.ctx.parameters()) +
                       list(model.layer.parameters())
                      , lr=1e-10)

num_batches = 200
for batch_idx, meta_train_batch in zip(range(num_batches), meta_trainloader):
    start = time.time()

    train_inputs, train_targets = [x.to(device) for x in meta_train_batch["train"]]
    test_inputs, test_targets = [x.to(device) for x in meta_train_batch["test"]]
    
    cum_loss = torch.tensor(0., device=device)
    accuracy = torch.tensor(0., device=device)
    reg = torch.tensor(0., device=device)

    for task_idx, (train_input, train_target, test_input, test_target) in enumerate(
        zip(train_inputs, train_targets, test_inputs, test_targets)):
        for i in range(1):
            sample_optimizer.zero_grad()
            pred, kld = model(train_input, train_target, test_input)
            loss = F.cross_entropy(pred, test_target)
            loss.backward()
            sample_optimizer.step()
            for param_group in sample_optimizer.param_groups:
                param_group['lr'] *= 0.99
        optimizer.zero_grad()
        pred, kld = model(train_input, train_target, test_input)
        loss = F.cross_entropy(pred, test_target)
        (loss + kld).backward()
        optimizer.step()
        with torch.no_grad():
            cum_loss += loss
            accuracy += torch.sum(pred.argmax(1) == test_target.cuda())
            reg += kld
        
    cum_loss /= batch_size
    accuracy /= batch_size * 75
    reg /= batch_size

    writer.add_scalar('train/accuracy', accuracy, batch_idx)
    writer.add_scalar('train/reg', reg, batch_idx)
    #if batch_idx % 10 == 0:
    print("%3d) loss = %f, kld = %f, acc = %f, time = %.3f sec" %(batch_idx, cum_loss, reg, accuracy, time.time() - start))

Using cache found in C:\Users\KJH/.cache\torch\hub\rwightman_gen-efficientnet-pytorch_master


  0) loss = 1.609909, kld = 0.003760, acc = 0.200000, time = 7.805 sec
  1) loss = 1.609633, kld = 0.003787, acc = 0.200000, time = 7.566 sec
  2) loss = 1.609422, kld = 0.003775, acc = 0.201667, time = 7.586 sec
  3) loss = 1.609432, kld = 0.003788, acc = 0.198333, time = 7.674 sec
  4) loss = 1.609439, kld = 0.003747, acc = 0.195833, time = 7.703 sec


In [3]:
meta_testset  = Omniglot('./data/',
                   # Number of ways
                   num_classes_per_task=5,
                   # Resize the images to 28x28 and converts them to PyTorch tensors (from Torchvision)
                   transform=Compose([Resize(28), ToTensor()]),
                   # Transform the labels to integers (e.g. ("Glagolitic/character01", "Sanskrit/character14", ...) to (0, 1, ...))
                   target_transform=Categorical(num_classes=5),
                   # Creates new virtual classes with rotated versions of the images (from Santoro et al., 2016)
                   class_augmentations=[Rotation([90, 180, 270])],
                   meta_train=True,
                   download=True)
meta_testset = ClassSplitter(meta_testset, shuffle=True, num_train_per_class=5, num_test_per_class=15)
meta_testloader = BatchMetaDataLoader(meta_testset, batch_size=batch_size, num_workers=0)

tot_loss = torch.tensor(0., device=device)
tot_acc = torch.tensor(0., device=device)
tot_reg = torch.tensor(0., device=device)

for batch_idx, meta_test_batch in zip(range(num_batches), meta_testloader):
    start = time.time()
    train_inputs, train_targets = [x.to(device) for x in meta_test_batch["train"]]
    test_inputs, test_targets = [x.to(device) for x in meta_test_batch["test"]]
    
    cum_loss = torch.tensor(0., device=device)
    accuracy = torch.tensor(0., device=device)
    reg = torch.tensor(0., device=device)
    
    optimizer.zero_grad()
    for task_idx, (train_input, train_target, test_input, test_target) in enumerate(
        zip(train_inputs, train_targets, test_inputs, test_targets)):
        for i in range(1):
            sample_optimizer.zero_grad()
            pred, kld = model(train_input, train_target, test_input)
            loss = F.cross_entropy(pred, test_target)
            loss.backward()
            sample_optimizer.step()
        with torch.no_grad():
            pred, kld = model(train_input, train_target, test_input)
            loss = F.cross_entropy(pred, test_target)
            cum_loss += loss
            accuracy += torch.sum(pred.argmax(1) == test_target.cuda()) 
            reg += kld

    tot_loss += cum_loss / batch_size
    tot_acc += accuracy / (batch_size * 75)
    tot_reg += reg / batch_size
        
tot_loss /= num_batches
tot_acc /= num_batches
tot_reg /= num_batches
        
print("loss = %f, test_kld = %f, meta_test_acc = %f, time = %.3f sec" %(tot_loss, tot_reg, tot_acc, time.time() - start))

loss = 0.741117, test_kld = 0.003300, meta_test_acc = 0.876667, time = 4.163 sec
