In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torch.nn.functional as F
import torch.utils as utils
import torch.utils.data
import torchvision
import torchvision.transforms as transforms
import torchvision.transforms.functional as TF
import torchvision.datasets as dsets
from torchvision import models
from torchvision.utils import save_image
import pdb


from tqdm import tqdm

from flows import PlanarFlow
from utils import Binarize
from codes import Linear_flipout, Flatten, count_parameters

from torchmeta.datasets.helpers import cifar_fs
from torchmeta.utils.data import BatchMetaDataLoader


#from __future__ import print_function
import argparse
import cv2
import matplotlib.pyplot as plt

import os
cur_dir = "C:/Users/KJH/OneDrive - skku.edu/KJH/Projects/2019winter_research"
#cur_dir = "C:/Users/KJH-Laptop/OneDrive - skku.edu/KJH/Projects/2019winter_research/"
os.chdir(cur_dir)
#os.environ["CUDA_VISIBLE_DEVICES"] = "1"
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

import time
import copy
import random as rd

device = torch.device('cuda')

batch_size_train = 256
batch_size_test = 256

trainset = torchvision.datasets.MNIST('./data/', train=True, download=True,
                                      transform=torchvision.transforms.Compose([
                                          torchvision.transforms.ToTensor(),
                                          torchvision.transforms.Normalize(
                                              (0.1307,), (0.3081,))
                                      ]))
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size_train, shuffle=True)
testset = torchvision.datasets.MNIST('./data/', train=False, download=True,
                                     transform=torchvision.transforms.Compose([
                                         torchvision.transforms.ToTensor(),
                                         torchvision.transforms.Normalize(
                                             (0.1307,), (0.3081,))
                                     ]))
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size_test, shuffle=True)



def loss_fn(pred, label, model, progress):
    loss = F.cross_entropy(pred, label, weight=None, ignore_index=-100, reduction='mean')
    if progress < 0.5:
        loss += (2 * progress)**2 * model.kld()
    else:
        loss += model.kld()
    return loss

In [2]:
class bnn(nn.Module):
    def __init__(self):
        super(bnn, self).__init__()
        self.flatten = Flatten()
        self.l1 = Linear_flipout(784, 200)
        self.a1 = nn.ELU()
        self.l2 = Linear_flipout(200, 200)
        self.a2 = nn.ELU()
        self.l3 = Linear_flipout(200, 10)        

    def forward(self, x):
        sum_kld = 0.0
        x = self.flatten(x)
        
        x, kld = self.l1(x)
        sum_kld += kld
        x = self.a1(x)
        
        x, kld = self.l2(x)
        sum_kld += kld
        x = self.a2(x)
        
        x, kld = self.l3(x)
        sum_kld += kld
        return x, sum_kld
    
    def kld_backward(self, reg = 1):
        for layer in self.modules():
            if isinstance(layer, Linear_flipout):
                layer.kld_backward(reg)

In [3]:
model = bnn().cuda()
epoch = 100
lr = 1e-4
#optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.5) #, weight_decay=5e-4)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

def loss_fn(pred, label):
    loss = F.cross_entropy(pred, label, weight=None, ignore_index=-100, reduction='mean')
    return loss

#0.1 for epoch [0,150)
#0.01 for epoch [150,250)
#0.001 for epoch [250,350)

for run in range(epoch):
    start = time.time()
    
    #Training
    model.train()
    train_loss = 0.0
    train_kld = 0.0
    for ind, data in enumerate(trainloader):
        optimizer.zero_grad()
        img, label = data
        pred, kld = model(img.cuda())
        loss = F.cross_entropy(pred, label.cuda()) + kld
        loss.backward()
        model.kld_backward()
        optimizer.step()
        train_loss += loss.detach() * img.shape[0]
        train_kld += kld.detach() * img.shape[0]
    train_loss /= len(trainset)
    train_kld /= len(trainset)
    
    #Test
    model.eval()
    with torch.no_grad():
        test_loss = 0.0
        acc = 0.0
        test_kld = 0.0
        for ind, data in enumerate(testloader):
            img, label = data
            pred, kld = model(img.cuda())
            test_kld += kld.detach() * img.shape[0] 
            test_loss += (F.cross_entropy(pred, label.cuda()) + kld) * img.shape[0]
            acc += sum(pred.argmax(1) == label.cuda()).item()
        test_loss /= len(testset)
        test_kld /= len(testset)
        acc /= len(testset)
    print("epoch : %d, train loss = %5.6f, test loss = %5.6f, train kld = %5.6f, test kld = %.4f, acc = %.3f, time: %f sec"
          %(run, train_loss, test_loss, train_kld, test_kld, acc, time.time() - start))

epoch : 0, train loss = 0.972902, test loss = 0.457351, train kld = 0.005352, test kld = 0.0057, acc = 0.866, time: 18.449459 sec


KeyboardInterrupt: 

In [None]:
model.eval()
    with torch.no_grad():
        test_loss = 0.0
        acc = 0.0
        test_kld = 0.0
        for ind, data in enumerate(testloader):
            img, label = data
            for sample in range(30):
                if sample:
                    pred = model(img.cuda())
                else:
                    pred = torch.cat((pred, model(img.cuda()).unsqueeze(0)), 0
            acc += sum(pred.argmax(1) == label.cuda()).item()
        test_loss /= len(testset)
        acc /= len(testset)

In [6]:
class base_net(nn.Module):
    def __init__(self):
        super(base_net, self).__init__()
        self.layers = nn.Sequential(
            Flatten(),
            nn.Linear(784, 200),
            nn.ReLU(),
            nn.Linear(200, 200),
            nn.ReLU(),
            nn.Linear(200, 10),
        )
        
    def forward(self, x):
        return self.layers(x)

In [7]:
model = base_net().cuda()
epoch = 30
lr = 3e-4
#optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
CE_loss = nn.CrossEntropyLoss()

#0.1 for epoch [0,150)
#0.01 for epoch [150,250)
#0.001 for epoch [250,350)

running_test_loss = 0.0
for run in range(epoch):
    start = time.time()
    
    #Training
    model.train()
    train_loss = 0.0
    for ind, data in enumerate(trainloader):
        optimizer.zero_grad()
        img, label = data
        pred = model(img.cuda())
        loss = CE_loss(pred, label.cuda())
        train_loss += loss.detach() * img.shape[0] 
        loss.backward()
        optimizer.step()
    train_loss /= len(trainset)
    
    #Test
    model.eval()
    with torch.no_grad():
        test_loss = 0.0
        acc = 0.0
        for ind, data in enumerate(testloader):
            img, label = data
            pred = model(img.cuda())
            #test_loss += CE_loss(pred, label.cuda()).detach() * img.shape[0]
            acc += sum(pred.argmax(1) == label.cuda()).item()
        test_loss /= len(testset)
        acc /= len(testset)
        running_test_loss += test_loss
    #print("epoch : %d, train loss = %5.6f, test loss = %5.6f, running_test_loss = %5.6f, acc = %.3f, reg = %.4f, time: %f sec"
    #      %(run, train_loss, test_loss, running_test_loss / (run + 1), acc, sum_kl/(ind + 1), time.time() - start))
    print("epoch : %d, train loss = %5.6f, acc = %.3f, time: %f sec"
          %(run, train_loss, acc, time.time() - start))

epoch : 0, train loss = 0.528034, acc = 0.928, time: 11.999602 sec
epoch : 1, train loss = 0.213323, acc = 0.948, time: 12.046666 sec
epoch : 2, train loss = 0.156956, acc = 0.959, time: 11.969157 sec
epoch : 3, train loss = 0.124217, acc = 0.964, time: 11.964155 sec
epoch : 4, train loss = 0.100775, acc = 0.967, time: 11.942140 sec
epoch : 5, train loss = 0.082867, acc = 0.971, time: 11.881150 sec
epoch : 6, train loss = 0.069517, acc = 0.973, time: 12.075167 sec
epoch : 7, train loss = 0.060014, acc = 0.974, time: 11.900100 sec
epoch : 8, train loss = 0.050877, acc = 0.975, time: 12.088189 sec
epoch : 9, train loss = 0.043593, acc = 0.978, time: 12.276212 sec
epoch : 10, train loss = 0.037962, acc = 0.977, time: 12.066173 sec
epoch : 11, train loss = 0.032076, acc = 0.979, time: 12.031160 sec
epoch : 12, train loss = 0.027815, acc = 0.979, time: 12.004169 sec
epoch : 13, train loss = 0.024346, acc = 0.978, time: 11.858126 sec
epoch : 14, train loss = 0.020935, acc = 0.979, time: 11.9

KeyboardInterrupt: 