In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torch.nn.functional as F
import torch.utils as utils
import torch.utils.data
import torchvision
import torchvision.transforms as transforms
import torchvision.transforms.functional as TF
import torchvision.datasets as dsets
from torchvision import models
from torchvision.utils import save_image
from torch.utils.tensorboard import SummaryWriter

writer = SummaryWriter('runs/thesis06_5w_1s')


from tqdm import tqdm

from flows import PlanarFlow
from utils import Binarize
from codes import Linear_flipout, Flatten, count_parameters, EfficientNet

from torchmeta.datasets import Omniglot, CIFARFS
from torchmeta.transforms import Categorical, ClassSplitter, Rotation
from torchvision.transforms import Compose, Resize, ToTensor
from torchmeta.utils.data import BatchMetaDataLoader


#from __future__ import print_function
import argparse
import cv2
import matplotlib.pyplot as plt

import os
cur_dir = "C:/Users/KJH/OneDrive - skku.edu/KJH/Projects/2019winter_research"
#cur_dir = "C:/Users/KJH-Laptop/OneDrive - skku.edu/KJH/Projects/2019winter_research/"
os.chdir(cur_dir)
#os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

import time
import copy
import random as rd

device = torch.device('cuda')

class net(nn.Module):
    def __init__(self, num_classes):
        super(net, self).__init__()
        self.input_dim = [1, 28, 28]
        self.num_classes = num_classes
        
        self.ctx = torch.hub.load('rwightman/gen-efficientnet-pytorch', 'efficientnet_b0', pretrained=True)
        self.ctx.conv_stem = nn.Conv2d(1, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
        self.ctx.classifier = nn.Identity()
        
        self.layer = nn.LSTM(input_size=1280, hidden_size=1280, num_layers=5, bias=True, batch_first = True).to(device)
        self.dec_mu = nn.ModuleList()
        self.dec_logvar = nn.ModuleList()
        
        self.num_params = [[200 * 200, 200], [200 * 200, 200], [200 * self.num_classes, self.num_classes]]
        
        for layer_size in self.num_params:
            self.dec_mu.append(
                nn.Sequential(
                    nn.ELU(),
                    nn.Linear(1280, 1280, bias = True),
                    nn.ELU(),
                    nn.Linear(1280, layer_size[0] + layer_size[1], bias = True),
                ).to(device))
            
            self.dec_logvar.append(
                nn.Sequential(
                    nn.ELU(),
                    nn.Linear(1280, 1280, bias = True),
                    nn.ELU(),
                    nn.Linear(1280, layer_size[0] + layer_size[1], bias = True),
                ).to(device))
        
        self.encoder = nn.Sequential(
            nn.BatchNorm2d(1),
            Flatten(),            
            nn.Linear(784, 200, bias = False),
            nn.ELU()
        )

            
            
    def forward(self, input_train, label_train, input_test, label_test, adapt_lr, adapt_step = 1):
        ctx = self.ctx(input_train).view(input_train.shape[0], self.num_classes, 256)
        h = torch.stack([ctx[torch.where(label_train == x)].mean(dim = 0) for x in range(self.num_classes)], dim = 1)
        h = h.view(self.num_classes, 1, -1)
        
        params = nn.ParameterList()
        param_vals = []
        kld = torch.tensor(0., device=device)
        c = torch.empty([5, 1, 1280], requires_grad = False, device = device).fill_(0)
        x_test_init = self.encoder(input_test)
        x_test = x_test_init

        for ind, (dec_mu, dec_logvar) in enumerate(zip(self.dec_mu, self.dec_logvar)):
            x = torch.empty([1, 1, 1280], requires_grad=False, device = device).normal_(0, 1)
            f, (h, c) = self.layer(x, (h, c))
            
            mu = dec_mu(f.view(1, -1)).squeeze()
            logvar = dec_logvar(f.view(1, -1)).squeeze()
            params.append(nn.Parameter(torch.stack((mu, logvar), dim = 0), requires_grad = True))
        
        optimizer = optim.SGD(params, lr = adapt_lr)
        
        for step in range(adapt_step + 1):
            x_test = x_test_init
            kld = torch.tensor(0., device=device)
            for ind, param in enumerate(params):
                param.retain_grad()

                weight_mu = param[0, :self.num_params[ind][0]].view(-1, self.num_params[ind][1])
                bias_mu = param[0, self.num_params[ind][0]:]

                weight_logvar = param[1, :self.num_params[ind][0]].view(-1, self.num_params[ind][1])
                bias_logvar = param[1, self.num_params[ind][0]:]

                weight_noise = torch.empty(weight_mu.shape, requires_grad = False, device = device).normal_(0,1)
                bias_noise = torch.empty(bias_mu.shape, requires_grad = False, device = device).normal_(0,1)
                in_sign = torch.empty(x_test.shape, requires_grad = False, device = device).uniform_(-1,1).sign()
                out_sign = torch.empty([x_test.shape[0], self.num_params[ind][1]], requires_grad = False, device = device).uniform_(-1,1).sign()

                x_test = torch.mm(x_test, weight_mu) + torch.mm(in_sign * x_test, weight_noise * weight_mu * weight_logvar.div(2).exp()) * out_sign
                x_test += (1 + bias_noise * bias_logvar.div(2).exp()) * bias_mu
                x_test = F.elu(x_test)

                kld += (mu.pow(2) - logvar + logvar.exp() - 1).mean()/2
                
            if step < adapt_step:
                optimizer.zero_grad()
                loss = F.cross_entropy(x_test, label_test) + 1e-6 * kld
                loss.backward(retain_graph = True)
                optimizer.step()

        return x_test, kld

In [2]:
batch_size = 16
meta_trainset = Omniglot('./data/',
                   # Number of ways
                   num_classes_per_task=5,
                   # Resize the images to 28x28 and converts them to PyTorch tensors (from Torchvision)
                   transform=Compose([Resize(28), ToTensor()]),
                   # Transform the labels to integers (e.g. ("Glagolitic/character01", "Sanskrit/character14", ...) to (0, 1, ...))
                   target_transform=Categorical(num_classes=5),
                   # Creates new virtual classes with rotated versions of the images (from Santoro et al., 2016)
                   class_augmentations=[Rotation([90, 180, 270])],
                   meta_train=True,
                   download=True)
meta_trainset = ClassSplitter(meta_trainset, shuffle=True, num_train_per_class=1, num_test_per_class=15)
meta_trainloader = BatchMetaDataLoader(meta_trainset, batch_size=batch_size, num_workers=0)

model = net(5).cuda()
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

num_batches = 200
for batch_idx, meta_train_batch in zip(range(num_batches), meta_trainloader):
    start = time.time()

    train_inputs, train_targets = [x.to(device) for x in meta_train_batch["train"]]
    test_inputs, test_targets = [x.to(device) for x in meta_train_batch["test"]]
    
    cum_loss = torch.tensor(0., device=device)
    accuracy = torch.tensor(0., device=device)
    reg = torch.tensor(0., device=device)

    for task_idx, (train_input, train_target, test_input, test_target) in enumerate(
        zip(train_inputs, train_targets, test_inputs, test_targets)):
        optimizer.zero_grad()
        pred, kld = model(train_input, train_target, test_input, test_target, 0.5, 5)
        loss = criterion(pred, test_target)
        (loss + 1e-6 * kld).backward()
        optimizer.step()
        with torch.no_grad():
            cum_loss += loss
            accuracy += torch.sum(pred.argmax(1) == test_target.cuda())
            reg += kld
    cum_loss /= batch_size
    accuracy /= batch_size * 75
    reg /= batch_size

    if batch_idx % 10 == 0:
        print("%3d) loss = %f, kld = %f, acc = %f, time = %.3f sec" %(batch_idx, cum_loss, reg, accuracy, time.time() - start))

Using cache found in C:\Users\KJH/.cache\torch\hub\rwightman_gen-efficientnet-pytorch_master


  0) loss = 1.114917, kld = 0.000943, acc = 0.735833, time = 27.221 sec
 10) loss = 0.800171, kld = 0.000491, acc = 0.778333, time = 26.762 sec
 20) loss = 0.936648, kld = 0.000320, acc = 0.732500, time = 25.786 sec
 30) loss = 0.770455, kld = 0.000244, acc = 0.827500, time = 26.028 sec
 40) loss = 0.619009, kld = 0.000210, acc = 0.845833, time = 25.877 sec
 50) loss = 0.615955, kld = 0.000189, acc = 0.830000, time = 26.454 sec
 60) loss = 0.589267, kld = 0.000177, acc = 0.853333, time = 27.509 sec
 70) loss = 0.745112, kld = 0.000165, acc = 0.789167, time = 25.629 sec
 80) loss = 1.067363, kld = 0.000170, acc = 0.745833, time = 25.720 sec
 90) loss = 0.511974, kld = 0.000170, acc = 0.851667, time = 26.368 sec
100) loss = 0.877231, kld = 0.000165, acc = 0.837500, time = 26.684 sec
110) loss = 0.622231, kld = 0.000160, acc = 0.837500, time = 27.811 sec
120) loss = 0.454001, kld = 0.000169, acc = 0.861667, time = 25.911 sec
130) loss = 0.338756, kld = 0.000164, acc = 0.897500, time = 25.

In [3]:
optimizer = optim.Adam(model.parameters(), lr=1e-5)

num_batches = 200
for batch_idx, meta_train_batch in zip(range(num_batches), meta_trainloader):
    start = time.time()

    train_inputs, train_targets = [x.to(device) for x in meta_train_batch["train"]]
    test_inputs, test_targets = [x.to(device) for x in meta_train_batch["test"]]
    
    cum_loss = torch.tensor(0., device=device)
    accuracy = torch.tensor(0., device=device)
    reg = torch.tensor(0., device=device)

    for task_idx, (train_input, train_target, test_input, test_target) in enumerate(
        zip(train_inputs, train_targets, test_inputs, test_targets)):
        optimizer.zero_grad()
        pred, kld = model(train_input, train_target, test_input, test_target, 0.5, 5)
        loss = criterion(pred, test_target)
        (loss + 1e-6 * kld).backward()
        optimizer.step()
        with torch.no_grad():
            cum_loss += loss
            accuracy += torch.sum(pred.argmax(1) == test_target.cuda())
            reg += kld
    cum_loss /= batch_size
    accuracy /= batch_size * 75
    reg /= batch_size

    if batch_idx % 10 == 0:
        print("%3d) loss = %f, kld = %f, acc = %f, time = %.3f sec" %(batch_idx, cum_loss, reg, accuracy, time.time() - start))

  0) loss = 0.609192, kld = 0.000160, acc = 0.850000, time = 25.502 sec
 10) loss = 1.584619, kld = 0.000159, acc = 0.868333, time = 25.921 sec
 20) loss = 0.750604, kld = 0.000155, acc = 0.804167, time = 27.409 sec
 30) loss = 0.798320, kld = 0.000159, acc = 0.827500, time = 25.369 sec
 40) loss = 0.600944, kld = 0.000160, acc = 0.838333, time = 25.494 sec
 50) loss = 0.345966, kld = 0.000160, acc = 0.906667, time = 25.701 sec
 60) loss = 0.504387, kld = 0.000161, acc = 0.853333, time = 25.430 sec
 70) loss = 0.309215, kld = 0.000161, acc = 0.902500, time = 27.540 sec
 80) loss = 0.356428, kld = 0.000158, acc = 0.891667, time = 25.883 sec
 90) loss = 0.472840, kld = 0.000162, acc = 0.875833, time = 25.415 sec
100) loss = 0.558434, kld = 0.000162, acc = 0.871667, time = 25.283 sec
110) loss = 0.192124, kld = 0.000157, acc = 0.949167, time = 25.623 sec
120) loss = 0.934178, kld = 0.000159, acc = 0.915000, time = 26.508 sec
130) loss = 0.412599, kld = 0.000157, acc = 0.874167, time = 26.

In [4]:
optimizer = optim.Adam(model.parameters(), lr=1e-7)

num_batches = 200
for batch_idx, meta_train_batch in zip(range(num_batches), meta_trainloader):
    start = time.time()

    train_inputs, train_targets = [x.to(device) for x in meta_train_batch["train"]]
    test_inputs, test_targets = [x.to(device) for x in meta_train_batch["test"]]
    
    cum_loss = torch.tensor(0., device=device)
    accuracy = torch.tensor(0., device=device)
    reg = torch.tensor(0., device=device)

    for task_idx, (train_input, train_target, test_input, test_target) in enumerate(
        zip(train_inputs, train_targets, test_inputs, test_targets)):
        optimizer.zero_grad()
        pred, kld = model(train_input, train_target, test_input, test_target, 0.5, 5)
        loss = criterion(pred, test_target)
        (loss + 1e-6 * kld).backward()
        optimizer.step()
        with torch.no_grad():
            cum_loss += loss
            accuracy += torch.sum(pred.argmax(1) == test_target.cuda())
            reg += kld
    cum_loss /= batch_size
    accuracy /= batch_size * 75
    reg /= batch_size

    if batch_idx % 10 == 0:
        print("%3d) loss = %f, kld = %f, acc = %f, time = %.3f sec" %(batch_idx, cum_loss, reg, accuracy, time.time() - start))

  0) loss = 0.624367, kld = 0.000154, acc = 0.831667, time = 26.549 sec
 10) loss = 0.366524, kld = 0.000156, acc = 0.901667, time = 26.506 sec
 20) loss = 0.432718, kld = 0.000163, acc = 0.883333, time = 25.655 sec
 30) loss = 0.517042, kld = 0.000157, acc = 0.854167, time = 28.084 sec
 40) loss = 0.727221, kld = 0.000157, acc = 0.869167, time = 26.317 sec
 50) loss = 0.800768, kld = 0.000161, acc = 0.829167, time = 26.177 sec
 60) loss = 0.392935, kld = 0.000156, acc = 0.895000, time = 25.389 sec
 70) loss = 0.988804, kld = 0.000158, acc = 0.835000, time = 25.477 sec
 80) loss = 0.379702, kld = 0.000154, acc = 0.905833, time = 27.630 sec
 90) loss = 0.718591, kld = 0.000157, acc = 0.841667, time = 26.234 sec
100) loss = 0.605882, kld = 0.000157, acc = 0.840000, time = 26.336 sec
110) loss = 0.465253, kld = 0.000155, acc = 0.876667, time = 26.250 sec
120) loss = 0.476437, kld = 0.000156, acc = 0.885833, time = 26.483 sec
130) loss = 0.758112, kld = 0.000160, acc = 0.846667, time = 26.

In [None]:
batch_size = 16
num_batches = 200
optimizer = optim.Adam(model.parameters(), lr=1e-7)
criterion = nn.CrossEntropyLoss()
meta_testset  = Omniglot('./data/',
                   # Number of ways
                   num_classes_per_task=5,
                   # Resize the images to 28x28 and converts them to PyTorch tensors (from Torchvision)
                   transform=Compose([Resize(28), ToTensor()]),
                   # Transform the labels to integers (e.g. ("Glagolitic/character01", "Sanskrit/character14", ...) to (0, 1, ...))
                   target_transform=Categorical(num_classes=5),
                   # Creates new virtual classes with rotated versions of the images (from Santoro et al., 2016)
                   class_augmentations=[Rotation([90, 180, 270])],
                   meta_test=True,
                   download=True)
meta_testset = ClassSplitter(meta_testset, shuffle=True, num_train_per_class=1, num_test_per_class=15)
meta_testloader = BatchMetaDataLoader(meta_testset, batch_size=batch_size, num_workers=0)

tot_loss = torch.tensor(0., device=device)
tot_acc = torch.tensor(0., device=device)
tot_reg = torch.tensor(0., device=device)

for batch_idx, meta_test_batch in zip(range(num_batches), meta_testloader):
    start = time.time()
    train_inputs, train_targets = [x.to(device) for x in meta_test_batch["train"]]
    test_inputs, test_targets = [x.to(device) for x in meta_test_batch["test"]]
    
    cum_loss = torch.tensor(0., device=device)
    accuracy = torch.tensor(0., device=device)
    reg = torch.tensor(0., device=device)

    for task_idx, (train_input, train_target, test_input, test_target) in enumerate(
        zip(train_inputs, train_targets, test_inputs, test_targets)):
        optimizer.zero_grad()
        pred, kld = model(train_input, train_target, test_input, test_target, 0.5, 0)
        with torch.no_grad():            
            loss = criterion(pred, test_target)
            cum_loss += loss
            accuracy += torch.sum(pred.argmax(1) == test_target.cuda())
            reg += kld

    tot_loss += cum_loss / batch_size
    tot_acc += accuracy / (batch_size * 75)
    tot_reg += reg / batch_size
    
    print("%3d) loss = %f, kld = %f, acc = %f, time = %.3f sec" %(batch_idx, cum_loss / batch_size, reg / batch_size, accuracy / (batch_size * 75), time.time() - start))
        
tot_loss /= num_batches
tot_acc /= num_batches
tot_reg /= num_batches
        
print("loss = %f, test_kld = %f, meta_test_acc = %f, time = %.3f sec" %(tot_loss, tot_reg, tot_acc, time.time() - start))

  0) loss = 1.612343, kld = 0.000159, acc = 0.204167, time = 1.447 sec
  1) loss = 1.614228, kld = 0.000161, acc = 0.206667, time = 1.021 sec
  2) loss = 1.611155, kld = 0.000166, acc = 0.205000, time = 1.084 sec
  3) loss = 1.617296, kld = 0.000155, acc = 0.180833, time = 1.039 sec
  4) loss = 1.615215, kld = 0.000158, acc = 0.210833, time = 1.042 sec
  5) loss = 1.613404, kld = 0.000156, acc = 0.204167, time = 1.017 sec
  6) loss = 1.608475, kld = 0.000160, acc = 0.201667, time = 1.024 sec
  7) loss = 1.611615, kld = 0.000155, acc = 0.188333, time = 1.030 sec
  8) loss = 1.612790, kld = 0.000157, acc = 0.200000, time = 1.049 sec
  9) loss = 1.613554, kld = 0.000157, acc = 0.198333, time = 1.066 sec
 10) loss = 1.614281, kld = 0.000160, acc = 0.170833, time = 1.036 sec
 11) loss = 1.614609, kld = 0.000160, acc = 0.190000, time = 1.055 sec
 12) loss = 1.607516, kld = 0.000157, acc = 0.202500, time = 1.057 sec
 13) loss = 1.613862, kld = 0.000158, acc = 0.214167, time = 1.082 sec
 14) l

In [7]:
torch.save(model.state_dict(), "./save/thesis06_omniglot5w1s_5step")

In [2]:
model = net(5).cuda()
model.load_state_dict(torch.load("./save/thesis06_omniglot5w1s_5step"))

Using cache found in C:\Users\KJH/.cache\torch\hub\rwightman_gen-efficientnet-pytorch_master


<All keys matched successfully>