In [None]:
import random
import os
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
from data_loader import GetLoader
from torchvision import datasets
from torchvision import transforms
from model import CNNModel
import numpy as np
from test import test

source_dataset_name = 'MNIST'
target_dataset_name = 'mnist_m'
source_image_root = os.path.join('dataset', source_dataset_name)
target_image_root = os.path.join('dataset', target_dataset_name)
model_root = 'models'
cuda = True
cudnn.benchmark = True
lr = 1e-3
batch_size = 128
image_size = 28
n_epoch = 100

manual_seed = random.randint(1, 10000)
random.seed(manual_seed)
torch.manual_seed(manual_seed)

# load data

img_transform_source = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.1307,), std=(0.3081,))
])

img_transform_target = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

dataset_source = datasets.MNIST(
    root='dataset',
    train=True,
    transform=img_transform_source,
    download=True
)

dataloader_source = torch.utils.data.DataLoader(
    dataset=dataset_source,
    batch_size=batch_size,
    shuffle=True,
    num_workers=8)

train_list = os.path.join(target_image_root, 'mnist_m_train_labels.txt')

dataset_target = GetLoader(
    data_root=os.path.join(target_image_root, 'mnist_m_train'),
    data_list=train_list,
    transform=img_transform_target
)

dataloader_target = torch.utils.data.DataLoader(
    dataset=dataset_target,
    batch_size=batch_size,
    shuffle=True,
    num_workers=8)

# load model

my_net = CNNModel()

# setup optimizer

optimizer = optim.Adam(my_net.parameters(), lr=lr)

loss_class = torch.nn.NLLLoss()
loss_domain = torch.nn.NLLLoss()

if cuda:
    my_net = my_net.cuda()
    loss_class = loss_class.cuda()
    loss_domain = loss_domain.cuda()

for p in my_net.parameters():
    p.requires_grad = True

# training

for epoch in range(n_epoch):

    len_dataloader = min(len(dataloader_source), len(dataloader_target))
    data_source_iter = iter(dataloader_source)
    data_target_iter = iter(dataloader_target)

    i = 0
    while i < len_dataloader:

        p = float(i + epoch * len_dataloader) / n_epoch / len_dataloader
        alpha = 2. / (1. + np.exp(-10 * p)) - 1

        # training model using source data
        data_source = data_source_iter.next()
        s_img, s_label = data_source

        my_net.zero_grad()
        batch_size = len(s_label)

        input_img = torch.FloatTensor(batch_size, 3, image_size, image_size)
        class_label = torch.LongTensor(batch_size)
        domain_label = torch.zeros(batch_size)
        domain_label = domain_label.long()

        if cuda:
            s_img = s_img.cuda()
            s_label = s_label.cuda()
            input_img = input_img.cuda()
            class_label = class_label.cuda()
            domain_label = domain_label.cuda()

        input_img.resize_as_(s_img).copy_(s_img)
        class_label.resize_as_(s_label).copy_(s_label)

        class_output, domain_output = my_net(input_data=input_img, alpha=alpha)
        err_s_label = loss_class(class_output, class_label)
        err_s_domain = loss_domain(domain_output, domain_label)

        # training model using target data
        data_target = data_target_iter.next()
        t_img, _ = data_target

        batch_size = len(t_img)

        input_img = torch.FloatTensor(batch_size, 3, image_size, image_size)
        domain_label = torch.ones(batch_size)
        domain_label = domain_label.long()

        if cuda:
            t_img = t_img.cuda()
            input_img = input_img.cuda()
            domain_label = domain_label.cuda()

        input_img.resize_as_(t_img).copy_(t_img)

        _, domain_output = my_net(input_data=input_img, alpha=alpha)
        err_t_domain = loss_domain(domain_output, domain_label)
        err = err_t_domain + err_s_domain + err_s_label
        err.backward()
        optimizer.step()

        i += 1

        print ('epoch: %d, [iter: %d / all %d], err_s_label: %f, err_s_domain: %f, err_t_domain: %f' \
              % (epoch, i, len_dataloader, err_s_label.data.cpu().numpy(),
                 err_s_domain.data.cpu().numpy(), err_t_domain.data.cpu().item()))

    torch.save(my_net, '{0}/mnist_mnistm_model_epoch_{1}.pth'.format(model_root, epoch))
    test(source_dataset_name, epoch)
    test(target_dataset_name, epoch)

print('done')


In [1]:
import sys
sys.path.insert(1, "DANN_py3")

In [2]:
import random
import os
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
from data_loader import GetLoader
from torchvision import datasets
from torchvision import transforms
from model import CNNModel
import numpy as np
from test import test
from pytorchtools import count_parameters
import torch.nn as nn
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
import pickle

dataset_name = "POLLUTION"
task_size = 50
window_size = 5

train_data = pickle.load(  open( "../Data/TRAIN-"+dataset_name+"-W"+str(window_size)+"-T"+str(task_size)+"-NOML.pickle", "rb" ) )
validation_data = pickle.load( open( "../Data/VAL-"+dataset_name+"-W"+str(window_size)+"-T"+str(task_size)+"-NOML.pickle", "rb" ) )


In [4]:
train_data.x.shape

(119483, 5, 14)

In [5]:
from VariationalRecurrentNeuralNetwork.model import VRNN

In [6]:
x_dim = 14
h_dim = 50
z_dim = 16
n_layers =  1
n_epochs = 100
clip = 10
learning_rate = 1e-3
batch_size = 128
seed = 128
print_every = 100
save_every = 10

In [7]:
device

device(type='cuda', index=0)

In [8]:
sample = torch.tensor(train_data.x[0:5]).float()

sample = sample.squeeze().transpose(0, 1)
sample.shape

torch.Size([5, 5, 14])

In [9]:
model = VRNN(x_dim, h_dim, z_dim, n_layers, device)
model.cuda()
count_parameters(model)

49092

In [10]:
model.device

device(type='cuda', index=0)

In [11]:
kld_loss, nll_loss, (all_enc_mean, all_enc_std), (all_dec_mean, all_dec_std), x1, h = model(sample)

In [12]:
class VRADA(nn.Module):
    
    def __init__(self, x_dim, h_dim, z_dim, out_dim, n_layers, device, bias=False):
        super().__init__()
        
        self.device = device
        self.vrnn = VRNN(x_dim, h_dim, z_dim, n_layers, device)
        self.linear = nn.Linear (h_dim, out_dim)
        
    def forward(self, x):
        
        x = x.to(self.device)
        x = x.squeeze().transpose(0, 1)
        kld_loss, nll_loss, (all_enc_mean, all_enc_std), (all_dec_mean, all_dec_std), x1, h = self.vrnn(x)
        out = self.linear(h.squeeze())
        
        return out, kld_loss, nll_loss
        
    def cuda(self):
        
        self.vrnn.cuda()
        
        super().cuda()
        

In [45]:
out_dim = 1
model = VRADA(x_dim, h_dim, z_dim, out_dim, n_layers, device)


In [46]:
model.cuda()
model

VRADA(
  (vrnn): VRNN(
    (phi_x): Sequential(
      (0): Linear(in_features=14, out_features=50, bias=True)
      (1): ReLU()
      (2): Linear(in_features=50, out_features=50, bias=True)
      (3): ReLU()
    )
    (phi_z): Sequential(
      (0): Linear(in_features=16, out_features=50, bias=True)
      (1): ReLU()
    )
    (enc): Sequential(
      (0): Linear(in_features=100, out_features=50, bias=True)
      (1): ReLU()
      (2): Linear(in_features=50, out_features=50, bias=True)
      (3): ReLU()
    )
    (enc_mean): Linear(in_features=50, out_features=16, bias=True)
    (enc_std): Sequential(
      (0): Linear(in_features=50, out_features=16, bias=True)
      (1): Softplus(beta=1, threshold=20)
    )
    (prior): Sequential(
      (0): Linear(in_features=50, out_features=50, bias=True)
      (1): ReLU()
    )
    (prior_mean): Linear(in_features=50, out_features=16, bias=True)
    (prior_std): Sequential(
      (0): Linear(in_features=50, out_features=16, bias=True)
      (1):

In [47]:
out = model(sample)

In [48]:
device

device(type='cuda', index=0)

In [49]:
from torch.utils.data import Dataset, DataLoader
from metrics import torch_mae as mae

params = {'batch_size': batch_size,
          'shuffle': True,
          'num_workers': 0}

n_epochs = 10

train_loader = DataLoader(train_data, **params)
train_iter = iter(train_loader)

val_loader = DataLoader(validation_data, **params)
val_iter = iter(val_loader)

optimizer = optim.Adam(model.parameters(), lr=0.0001)


In [87]:
def step (model, data_loader, loss_function, train =False, optimizer=None):

    data_iter = iter(data_loader)
    total_loss = 0.0
    for x, y in data_iter:

        model.zero_grad()
        x, y = torch.tensor(x).float().to(device), torch.tensor(y).float().to(device)
        y_pred, kld_loss, nll_loss = model(x)
        loss = loss_function(y_pred, y)

        loss += 0.00001*kld_loss+ 0.00001*nll_loss
        if train:
            loss.backward()
            optimizer.step()
        
        current_loss = loss.cpu().data.numpy()*x.shape[0]
        total_loss += current_loss
        
    return total_loss/len(data_loader)
    

In [None]:
for i in range(n_epochs):
    
    train_loss = step(model, train_loader, mae, train = True, optimizer = optimizer)
    val_loss = step(model, val_loader, mae)
    
    print("Train:", train_loss)
    print("Val:", val_loss)

  


Train: 26.500867009099085
Val: 25.11963867925995


In [56]:
val_loss = step(model, val_loader, mae)


  


In [57]:
print("Train:", train_loss)
print("Val:", val_loss)

Train: nan
Val: nan


In [61]:
sample

tensor([[[-1.6704e+00, -1.6808e+00, -1.6615e+00,  1.3676e+00, -5.0900e-01,
          -3.6489e-01,  8.3746e-01, -2.6723e+00,  1.3031e+00, -3.4612e-01,
          -2.9373e-03, -3.1029e-03, -6.4236e-02, -5.3738e-01],
         [-1.6704e+00, -1.6808e+00, -1.5170e+00,  1.3676e+00, -5.0900e-01,
          -3.0141e-01,  7.3925e-01, -2.7678e+00,  1.3031e+00, -2.5573e-01,
          -2.9373e-03, -3.1029e-03, -6.4236e-02, -5.3738e-01],
         [-1.6704e+00, -1.6808e+00, -1.3726e+00,  1.3676e+00, -5.0900e-01,
          -3.6489e-01,  6.4105e-01, -2.6723e+00,  1.3031e+00, -2.0404e-01,
          -2.9373e-03, -3.1029e-03, -6.4236e-02, -5.3738e-01],
         [-1.6704e+00, -1.6808e+00, -1.2281e+00,  1.3676e+00, -5.0900e-01,
          -1.7447e-01,  6.4105e-01, -2.9588e+00,  1.3031e+00, -1.1364e-01,
          -2.9373e-03, -3.1029e-03, -6.4236e-02, -5.3738e-01],
         [-1.6704e+00, -1.6808e+00, -1.0836e+00,  1.3676e+00, -4.9214e-01,
          -2.3794e-01,  5.4284e-01, -2.7678e+00,  1.3031e+00, -2.3253e-02

In [65]:
model = VRADA(x_dim, h_dim, z_dim, out_dim, n_layers, device)

model.cuda()

y_pred, kld_loss, nll_loss = model(sample)

In [72]:
loss = mae(y_pred, torch.tensor(train_data.y[:5]).to(device))

In [75]:
loss +=  kld_loss + nll_loss
loss

tensor(503.8247, device='cuda:0', dtype=torch.float64, grad_fn=<AddBackward0>)