In [None]:
import random
import os
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
from data_loader import GetLoader
from torchvision import datasets
from torchvision import transforms
from model import CNNModel
import numpy as np
from test import test

source_dataset_name = 'MNIST'
target_dataset_name = 'mnist_m'
source_image_root = os.path.join('dataset', source_dataset_name)
target_image_root = os.path.join('dataset', target_dataset_name)
model_root = 'models'
cuda = True
cudnn.benchmark = True
lr = 1e-3
batch_size = 128
image_size = 28
n_epoch = 100

manual_seed = random.randint(1, 10000)
random.seed(manual_seed)
torch.manual_seed(manual_seed)

# load data

img_transform_source = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.1307,), std=(0.3081,))
])

img_transform_target = transforms.Compose([
    transforms.Resize(image_size),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
])

dataset_source = datasets.MNIST(
    root='dataset',
    train=True,
    transform=img_transform_source,
    download=True
)

dataloader_source = torch.utils.data.DataLoader(
    dataset=dataset_source,
    batch_size=batch_size,
    shuffle=True,
    num_workers=8)

train_list = os.path.join(target_image_root, 'mnist_m_train_labels.txt')

dataset_target = GetLoader(
    data_root=os.path.join(target_image_root, 'mnist_m_train'),
    data_list=train_list,
    transform=img_transform_target
)

dataloader_target = torch.utils.data.DataLoader(
    dataset=dataset_target,
    batch_size=batch_size,
    shuffle=True,
    num_workers=8)

# load model

my_net = CNNModel()

# setup optimizer

optimizer = optim.Adam(my_net.parameters(), lr=lr)

loss_class = torch.nn.NLLLoss()
loss_domain = torch.nn.NLLLoss()

if cuda:
    my_net = my_net.cuda()
    loss_class = loss_class.cuda()
    loss_domain = loss_domain.cuda()

for p in my_net.parameters():
    p.requires_grad = True

# training

for epoch in range(n_epoch):

    len_dataloader = min(len(dataloader_source), len(dataloader_target))
    data_source_iter = iter(dataloader_source)
    data_target_iter = iter(dataloader_target)

    i = 0
    while i < len_dataloader:

        p = float(i + epoch * len_dataloader) / n_epoch / len_dataloader
        alpha = 2. / (1. + np.exp(-10 * p)) - 1

        # training model using source data
        data_source = data_source_iter.next()
        s_img, s_label = data_source

        my_net.zero_grad()
        batch_size = len(s_label)

        input_img = torch.FloatTensor(batch_size, 3, image_size, image_size)
        class_label = torch.LongTensor(batch_size)
        domain_label = torch.zeros(batch_size)
        domain_label = domain_label.long()

        if cuda:
            s_img = s_img.cuda()
            s_label = s_label.cuda()
            input_img = input_img.cuda()
            class_label = class_label.cuda()
            domain_label = domain_label.cuda()

        input_img.resize_as_(s_img).copy_(s_img)
        class_label.resize_as_(s_label).copy_(s_label)

        class_output, domain_output = my_net(input_data=input_img, alpha=alpha)
        err_s_label = loss_class(class_output, class_label)
        err_s_domain = loss_domain(domain_output, domain_label)

        # training model using target data
        data_target = data_target_iter.next()
        t_img, _ = data_target

        batch_size = len(t_img)

        input_img = torch.FloatTensor(batch_size, 3, image_size, image_size)
        domain_label = torch.ones(batch_size)
        domain_label = domain_label.long()

        if cuda:
            t_img = t_img.cuda()
            input_img = input_img.cuda()
            domain_label = domain_label.cuda()

        input_img.resize_as_(t_img).copy_(t_img)

        _, domain_output = my_net(input_data=input_img, alpha=alpha)
        err_t_domain = loss_domain(domain_output, domain_label)
        err = err_t_domain + err_s_domain + err_s_label
        err.backward()
        optimizer.step()

        i += 1

        print ('epoch: %d, [iter: %d / all %d], err_s_label: %f, err_s_domain: %f, err_t_domain: %f' \
              % (epoch, i, len_dataloader, err_s_label.data.cpu().numpy(),
                 err_s_domain.data.cpu().numpy(), err_t_domain.data.cpu().item()))

    torch.save(my_net, '{0}/mnist_mnistm_model_epoch_{1}.pth'.format(model_root, epoch))
    test(source_dataset_name, epoch)
    test(target_dataset_name, epoch)

print('done')


In [1]:
import sys
sys.path.insert(1, "DANN_py3")

In [2]:
import random
import os
import torch.backends.cudnn as cudnn
import torch.optim as optim
import torch.utils.data
from data_loader import GetLoader
from torchvision import datasets
from torchvision import transforms
from model import CNNModel
import numpy as np
from test import test
from pytorchtools import count_parameters
import torch.nn as nn
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
import pickle

dataset_name = "POLLUTION"
task_size = 50
window_size = 5

train_data = pickle.load(  open( "../Data/TRAIN-"+dataset_name+"-W"+str(window_size)+"-T"+str(task_size)+"-NOML.pickle", "rb" ) )
validation_data = pickle.load( open( "../Data/VAL-"+dataset_name+"-W"+str(window_size)+"-T"+str(task_size)+"-NOML.pickle", "rb" ) )


In [4]:
train_data.x.shape

(119483, 5, 14)

In [5]:
from VariationalRecurrentNeuralNetwork.model import VRNN

In [6]:
x_dim = 14
h_dim = 50
z_dim = 16
n_layers =  1
n_epochs = 100
clip = 10
learning_rate = 1e-3
batch_size = 128
seed = 128
print_every = 100
save_every = 10

In [7]:
device

device(type='cuda', index=0)

In [8]:
sample = torch.tensor(train_data.x[0:5]).float()

sample = sample.squeeze().transpose(0, 1)
sample.shape

torch.Size([5, 5, 14])

In [9]:
model = VRNN(x_dim, h_dim, z_dim, n_layers, device)
model.cuda()
count_parameters(model)

49092

In [11]:
model.device

device(type='cuda', index=0)

In [12]:
kld_loss, nll_loss, (all_enc_mean, all_enc_std), (all_dec_mean, all_dec_std), x1, h = model(sample)

In [52]:
class VRADA(nn.Module):
    
    def __init__(self, x_dim, h_dim, z_dim, out_dim, n_layers, device, bias=False):
        super().__init__()
        
        self.device = device
        self.vrnn = VRNN(x_dim, h_dim, z_dim, n_layers, device)
        self.linear = nn.Linear (h_dim, out_dim)
        
    def forward(self, x):
        
        x = x.to(self.device)
        kld_loss, nll_loss, (all_enc_mean, all_enc_std), (all_dec_mean, all_dec_std), x1, h = self.vrnn(x)
        out = self.linear(h.squeeze())
        
        return out, kld_loss, nll_loss
        
    def cuda(self):
        
        self.vrnn.cuda()
        
        super().cuda()
        

In [53]:
out_dim = 1


In [54]:
model

VRADA(
  (vrnn): VRNN(
    (phi_x): Sequential(
      (0): Linear(in_features=14, out_features=50, bias=True)
      (1): ReLU()
      (2): Linear(in_features=50, out_features=50, bias=True)
      (3): ReLU()
    )
    (phi_z): Sequential(
      (0): Linear(in_features=16, out_features=50, bias=True)
      (1): ReLU()
    )
    (enc): Sequential(
      (0): Linear(in_features=100, out_features=50, bias=True)
      (1): ReLU()
      (2): Linear(in_features=50, out_features=50, bias=True)
      (3): ReLU()
    )
    (enc_mean): Linear(in_features=50, out_features=16, bias=True)
    (enc_std): Sequential(
      (0): Linear(in_features=50, out_features=16, bias=True)
      (1): Softplus(beta=1, threshold=20)
    )
    (prior): Sequential(
      (0): Linear(in_features=50, out_features=50, bias=True)
      (1): ReLU()
    )
    (prior_mean): Linear(in_features=50, out_features=16, bias=True)
    (prior_std): Sequential(
      (0): Linear(in_features=50, out_features=16, bias=True)
      (1):

In [55]:
count_parameters(model)

49143

In [56]:
device

device(type='cuda', index=0)

In [10]:
from torch.utils.data import Dataset, DataLoader
from metrics import torch_mae as mae

batch_size = 64
params = {'batch_size': batch_size,
          'shuffle': True,
          'num_workers': 0}

n_epochs = 100

train_loader = DataLoader(train_data, **params)
train_iter = iter(train_loader)

val_loader = DataLoader(validation_data, **params)
val_iter = iter(val_loader)




In [64]:
optimizer = optim.Adam(model.parameters(), lr=0.000001)
model = VRADA(x_dim, h_dim, z_dim, out_dim, n_layers, device)
model.cuda()

TypeError: __init__() missing 2 required positional arguments: 'n_layers' and 'device'

In [71]:
from torch.autograd import Variable
def step (model, data_loader, loss_function, train =False, optimizer=None):

    data_iter = iter(data_loader)
    total_loss = 0.0
    for x, y in data_iter:

        model.zero_grad()
        x, y = torch.tensor(x).float().to(device), torch.tensor(y).float().to(device)
        x = Variable(x.squeeze().transpose(0, 1))
        y_pred, kld_loss, nll_loss = model(x)
        loss = loss_function(y_pred, y)
        #print(kld_loss)
        #print(nll_loss)
        loss += 0.00001*kld_loss+ 0.00001*nll_loss
        if train:
            loss.backward()
            optimizer.step()
        
        current_loss = loss.cpu().data.numpy()*x.shape[0]
        total_loss += current_loss
        #print(len(data_loader))
    return total_loss/data_loader.dataset.x.shape[0]
    

In [72]:
for i in range(n_epochs):
    
    train_loss = step(model, train_loader, mae, train = True, optimizer = optimizer)
    val_loss = step(model, val_loader, mae)
    
    print("Train:", train_loss)
    print("Val:", val_loss)

  if __name__ == '__main__':


KeyboardInterrupt: 

In [None]:

class CNNModel(nn.Module):

    def __init__(self):
        super(CNNModel, self).__init__()
        self.feature = nn.Sequential()
        self.feature.add_module('f_conv1', nn.Conv2d(3, 64, kernel_size=5))
        self.feature.add_module('f_bn1', nn.BatchNorm2d(64))
        self.feature.add_module('f_pool1', nn.MaxPool2d(2))
        self.feature.add_module('f_relu1', nn.ReLU(True))
        self.feature.add_module('f_conv2', nn.Conv2d(64, 50, kernel_size=5))
        self.feature.add_module('f_bn2', nn.BatchNorm2d(50))
        self.feature.add_module('f_drop1', nn.Dropout2d())
        self.feature.add_module('f_pool2', nn.MaxPool2d(2))
        self.feature.add_module('f_relu2', nn.ReLU(True))

        self.class_classifier = nn.Sequential()
        self.class_classifier.add_module('c_fc1', nn.Linear(50 * 4 * 4, 100))
        self.class_classifier.add_module('c_bn1', nn.BatchNorm1d(100))
        self.class_classifier.add_module('c_relu1', nn.ReLU(True))
        self.class_classifier.add_module('c_drop1', nn.Dropout2d())
        self.class_classifier.add_module('c_fc2', nn.Linear(100, 100))
        self.class_classifier.add_module('c_bn2', nn.BatchNorm1d(100))
        self.class_classifier.add_module('c_relu2', nn.ReLU(True))
        self.class_classifier.add_module('c_fc3', nn.Linear(100, 10))
        self.class_classifier.add_module('c_softmax', nn.LogSoftmax(dim=1))

        self.domain_classifier = nn.Sequential()
        self.domain_classifier.add_module('d_fc1', nn.Linear(50 * 4 * 4, 100))
        self.domain_classifier.add_module('d_bn1', nn.BatchNorm1d(100))
        self.domain_classifier.add_module('d_relu1', nn.ReLU(True))
        self.domain_classifier.add_module('d_fc2', nn.Linear(100, 2))
        self.domain_classifier.add_module('d_softmax', nn.LogSoftmax(dim=1))

    def forward(self, input_data, alpha):
        input_data = input_data.expand(input_data.data.shape[0], 3, 28, 28)
        feature = self.feature(input_data)
        feature = feature.view(-1, 50 * 4 * 4)
        reverse_feature = ReverseLayerF.apply(feature, alpha)
        class_output = self.class_classifier(feature)
        domain_output = self.domain_classifier(reverse_feature)

        return class_output, domain_output


In [32]:
from functions import ReverseLayerF

class VRADA(nn.Module):
    
    def __init__(self, x_dim, h_dim, h_dim_reg, z_dim, out_dim, n_domains, n_layers, device, bias=False):
        super().__init__()
        
        self.device = device
        self.vrnn = VRNN(x_dim, h_dim, z_dim, n_layers, device)
        self.linear = nn.Linear (h_dim, out_dim)
        
        self.regressor = nn.Sequential()
        self.regressor.add_module('c_fc1', nn.Linear(h_dim, h_dim_reg))
        self.regressor.add_module('c_bn1', nn.BatchNorm1d(h_dim_reg))
        self.regressor.add_module('c_relu1', nn.ReLU(True))
        self.regressor.add_module('c_drop1', nn.Dropout2d())
        self.regressor.add_module('c_fc2', nn.Linear(h_dim_reg, out_dim))
        
        
        self.domain_classifier = nn.Sequential()
        self.domain_classifier = nn.Sequential()
        self.domain_classifier.add_module('c_fc1', nn.Linear(h_dim, h_dim_reg))
        self.domain_classifier.add_module('c_bn1', nn.BatchNorm1d(h_dim_reg))
        self.domain_classifier.add_module('c_relu1', nn.ReLU(True))
        self.domain_classifier.add_module('c_drop1', nn.Dropout2d())
        self.domain_classifier.add_module('c_fc2', nn.Linear(h_dim_reg, n_domains))
        self.domain_classifier.add_module('d_softmax', nn.LogSoftmax(dim=1))
        
    def forward(self, x, alpha):
        
        x = x.to(self.device)
        x = x.squeeze().transpose(0, 1)
        kld_loss, nll_loss, (all_enc_mean, all_enc_std), (all_dec_mean, all_dec_std), x1, h = self.vrnn(x)
        reverse_feature = ReverseLayerF.apply(h.squeeze(), alpha)
        regressor_output = self.regressor(reverse_feature)
        domain_class_output = self.domain_classifier(reverse_feature)
        
        return regressor_output, domain_class_output, kld_loss, nll_loss
        
    def cuda(self):
        
        self.vrnn.cuda()
        
        super().cuda()

In [62]:
h_dim = 50
z_dim = 16
h_dim_reg = 50
n_domains = np.max(train_data.file_idx)+1
out_dim=1
vrada = VRADA(x_dim, h_dim, h_dim_reg, z_dim, out_dim, n_domains, n_layers, device)

In [63]:
count_parameters(vrada)

54647

In [38]:
vrada.cuda()
#vrada(sample)

In [39]:
optimizer = optim.Adam(vrada.parameters(), lr=0.0001)

loss_regression = mae
loss_domain = torch.nn.NLLLoss()

In [40]:
x_sample = torch.tensor(train_data.x[0:5]).float()
y_sample = torch.tensor(train_data.y[0:5]).float()
d_sample = torch.tensor(train_data.file_idx[0:5]).float()



In [42]:
#regressor_output, domain_class_output, kld_loss, nll_loss = vrada(x_sample)

In [43]:
err_reg = loss_regression(regressor_output, y_sample.to(device) )
err_dom = loss_domain(domain_class_output.float(), d_sample.long().to(device))

RuntimeError: The size of tensor a (63) must match the size of tensor b (5) at non-singleton dimension 0

In [20]:
err_reg

tensor(0.1996, device='cuda:0', grad_fn=<MeanBackward0>)

In [21]:
d_sample

tensor([0., 0., 0., 0., 0.])

In [44]:
class DomainTSDataset:
    
    def __init__(self, dataset):
        
        self.x = dataset.x
        self.y = dataset.y
        self.d = dataset.file_idx
        
    def __getitem__(self, index):
        return self.x[index], self.y[index], self.d[index]

    def __len__(self):
        return self.y.shape[0]
        

In [45]:
domain_train_data = DomainTSDataset(train_data)
domain_train_loader = DataLoader(domain_train_data, **params)

domain_val_data = DomainTSDataset(validation_data)
domain_val_loader = DataLoader(domain_val_data, **params)

In [47]:
n_epoch = 10
for epoch in range(n_epoch):

    len_dataloader = len(domain_train_loader)
    train_iter = iter(domain_train_loader)


    i = 0
    while i < len_dataloader:

        p = float(i + epoch * len_dataloader) / n_epoch / len_dataloader
        alpha = 2. / (1. + np.exp(-10 * p)) - 1

        # training model using source data
        data_source = train_iter.next()
        x, y, d = data_source

        vrada.zero_grad()
        batch_size = len(x)
        
        x = torch.tensor(x).float().to(device)
        y = torch.tensor(y).float().to(device)
        d = torch.tensor(d).long().to(device)


        regressor_output, domain_class_output, kld_loss, nll_loss = vrada(x, alpha)       
        err_reg = loss_regression(regressor_output, y.to(device) )
        err_dom = loss_domain(domain_class_output.float(), d.long().to(device))

        # training model using target data
        err = err_reg + 0.1*err_dom + 0.000001*kld_loss+ 0.000001*nll_loss
        err.backward()
        optimizer.step()
        print(err.data, err_reg.data, err_dom.data, kld_loss.data, nll_loss.data)
        
        i += 1

    print ('epoch: %d, [iter: %d / all %d], err_reg: %f, err_domain: %f' \
              % (epoch, i, len_dataloader, err_reg.data.cpu().numpy(),
                 err_dom.data.cpu().numpy()))

    #torch.save(my_net, '{0}/mnist_mnistm_model_epoch_{1}.pth'.format(model_root, epoch))
    #test(source_dataset_name, epoch)
    #test(target_dataset_name, epoch)

print('done')




tensor(0.6507, device='cuda:0') tensor(0.5446, device='cuda:0') tensor(1.0353, device='cuda:0') tensor(30.7617, device='cuda:0') tensor(2544.3591, device='cuda:0')
tensor(0.5966, device='cuda:0') tensor(0.4843, device='cuda:0') tensor(1.0973, device='cuda:0') tensor(30.4367, device='cuda:0') tensor(2541.8091, device='cuda:0')
tensor(0.5496, device='cuda:0') tensor(0.4347, device='cuda:0') tensor(1.1236, device='cuda:0') tensor(29.7159, device='cuda:0') tensor(2526.8228, device='cuda:0')
tensor(0.6464, device='cuda:0') tensor(0.5346, device='cuda:0') tensor(1.0917, device='cuda:0') tensor(29.4133, device='cuda:0') tensor(2546.4604, device='cuda:0')
tensor(0.6829, device='cuda:0') tensor(0.5689, device='cuda:0') tensor(1.1144, device='cuda:0') tensor(29.0139, device='cuda:0') tensor(2499.1270, device='cuda:0')
tensor(0.6198, device='cuda:0') tensor(0.5074, device='cuda:0') tensor(1.0982, device='cuda:0') tensor(29.3720, device='cuda:0') tensor(2546.7896, device='cuda:0')
tensor(0.5863, d

tensor(0.4943, device='cuda:0') tensor(0.3902, device='cuda:0') tensor(1.0204, device='cuda:0') tensor(19.6868, device='cuda:0') tensor(2032.3333, device='cuda:0')
tensor(0.5737, device='cuda:0') tensor(0.4578, device='cuda:0') tensor(1.1391, device='cuda:0') tensor(19.1999, device='cuda:0') tensor(1958.7539, device='cuda:0')
tensor(0.4767, device='cuda:0') tensor(0.3745, device='cuda:0') tensor(1.0021, device='cuda:0') tensor(19.7890, device='cuda:0') tensor(1965.4971, device='cuda:0')
tensor(0.4548, device='cuda:0') tensor(0.3396, device='cuda:0') tensor(1.1322, device='cuda:0') tensor(20.3872, device='cuda:0') tensor(1977.8501, device='cuda:0')
tensor(0.5603, device='cuda:0') tensor(0.4447, device='cuda:0') tensor(1.1360, device='cuda:0') tensor(20.0589, device='cuda:0') tensor(1966.9868, device='cuda:0')
tensor(0.5542, device='cuda:0') tensor(0.4414, device='cuda:0') tensor(1.1086, device='cuda:0') tensor(19.9498, device='cuda:0') tensor(1976.1074, device='cuda:0')
tensor(0.5407, d

tensor(0.4077, device='cuda:0') tensor(0.2961, device='cuda:0') tensor(1.0990, device='cuda:0') tensor(19.1671, device='cuda:0') tensor(1679.6035, device='cuda:0')
tensor(0.4171, device='cuda:0') tensor(0.3145, device='cuda:0') tensor(1.0129, device='cuda:0') tensor(16.1328, device='cuda:0') tensor(1256.7173, device='cuda:0')
tensor(0.4764, device='cuda:0') tensor(0.3617, device='cuda:0') tensor(1.1338, device='cuda:0') tensor(17.2803, device='cuda:0') tensor(1296.9004, device='cuda:0')
tensor(0.4971, device='cuda:0') tensor(0.3871, device='cuda:0') tensor(1.0877, device='cuda:0') tensor(17.3304, device='cuda:0') tensor(1245.4982, device='cuda:0')
tensor(0.5024, device='cuda:0') tensor(0.3963, device='cuda:0') tensor(1.0480, device='cuda:0') tensor(17.5884, device='cuda:0') tensor(1287.6332, device='cuda:0')
tensor(0.4563, device='cuda:0') tensor(0.3472, device='cuda:0') tensor(1.0757, device='cuda:0') tensor(16.1023, device='cuda:0') tensor(1509.2019, device='cuda:0')
tensor(0.4897, d

tensor(0.4347, device='cuda:0') tensor(0.3310, device='cuda:0') tensor(1.0266, device='cuda:0') tensor(19.0679, device='cuda:0') tensor(1028.2311, device='cuda:0')
tensor(0.4817, device='cuda:0') tensor(0.3787, device='cuda:0') tensor(1.0227, device='cuda:0') tensor(17.0855, device='cuda:0') tensor(720.2743, device='cuda:0')
tensor(0.4151, device='cuda:0') tensor(0.3097, device='cuda:0') tensor(1.0464, device='cuda:0') tensor(18.7512, device='cuda:0') tensor(791.8324, device='cuda:0')
tensor(0.4460, device='cuda:0') tensor(0.3466, device='cuda:0') tensor(0.9835, device='cuda:0') tensor(19.5496, device='cuda:0') tensor(1053.2744, device='cuda:0')
tensor(0.4485, device='cuda:0') tensor(0.3477, device='cuda:0') tensor(1.0016, device='cuda:0') tensor(14.6929, device='cuda:0') tensor(624.2899, device='cuda:0')
tensor(0.4261, device='cuda:0') tensor(0.3223, device='cuda:0') tensor(1.0293, device='cuda:0') tensor(18.4092, device='cuda:0') tensor(790.3925, device='cuda:0')
tensor(0.4560, devic

tensor(0.4251, device='cuda:0') tensor(0.3218, device='cuda:0') tensor(1.0302, device='cuda:0') tensor(18.7428, device='cuda:0') tensor(249.9663, device='cuda:0')
tensor(0.3959, device='cuda:0') tensor(0.2842, device='cuda:0') tensor(1.1111, device='cuda:0') tensor(20.9118, device='cuda:0') tensor(629.7978, device='cuda:0')
tensor(0.4097, device='cuda:0') tensor(0.3082, device='cuda:0') tensor(1.0061, device='cuda:0') tensor(27.1037, device='cuda:0') tensor(857.8125, device='cuda:0')
tensor(0.4201, device='cuda:0') tensor(0.3096, device='cuda:0') tensor(1.1035, device='cuda:0') tensor(19.6882, device='cuda:0') tensor(159.1277, device='cuda:0')
tensor(0.4204, device='cuda:0') tensor(0.3193, device='cuda:0') tensor(1.0052, device='cuda:0') tensor(19.4393, device='cuda:0') tensor(543.3922, device='cuda:0')
tensor(0.3825, device='cuda:0') tensor(0.2814, device='cuda:0') tensor(1.0063, device='cuda:0') tensor(22.4870, device='cuda:0') tensor(402.0552, device='cuda:0')
tensor(0.3709, device=

tensor(0.3670, device='cuda:0') tensor(0.2563, device='cuda:0') tensor(1.1067, device='cuda:0') tensor(13.3985, device='cuda:0') tensor(22.3127, device='cuda:0')
tensor(0.4116, device='cuda:0') tensor(0.3083, device='cuda:0') tensor(1.0283, device='cuda:0') tensor(15.7482, device='cuda:0') tensor(486.8362, device='cuda:0')
tensor(0.3642, device='cuda:0') tensor(0.2558, device='cuda:0') tensor(1.0842, device='cuda:0') tensor(17.3601, device='cuda:0') tensor(-73.8328, device='cuda:0')
tensor(0.4049, device='cuda:0') tensor(0.3006, device='cuda:0') tensor(1.0430, device='cuda:0') tensor(13.9908, device='cuda:0') tensor(6.3181, device='cuda:0')
tensor(0.3995, device='cuda:0') tensor(0.2932, device='cuda:0') tensor(1.0641, device='cuda:0') tensor(15.1495, device='cuda:0') tensor(-46.8207, device='cuda:0')
tensor(0.3646, device='cuda:0') tensor(0.2618, device='cuda:0') tensor(1.0281, device='cuda:0') tensor(15.9417, device='cuda:0') tensor(-111.1419, device='cuda:0')
tensor(0.4413, device='c

tensor(0.4258, device='cuda:0') tensor(0.3224, device='cuda:0') tensor(1.0310, device='cuda:0') tensor(9.8387, device='cuda:0') tensor(229.2126, device='cuda:0')
tensor(0.3999, device='cuda:0') tensor(0.3091, device='cuda:0') tensor(0.9082, device='cuda:0') tensor(12.3042, device='cuda:0') tensor(12.7034, device='cuda:0')
tensor(0.4048, device='cuda:0') tensor(0.3034, device='cuda:0') tensor(1.0080, device='cuda:0') tensor(14.2401, device='cuda:0') tensor(599.9667, device='cuda:0')
tensor(0.4204, device='cuda:0') tensor(0.3064, device='cuda:0') tensor(1.1362, device='cuda:0') tensor(10.5734, device='cuda:0') tensor(379.6886, device='cuda:0')
tensor(0.3867, device='cuda:0') tensor(0.2883, device='cuda:0') tensor(0.9834, device='cuda:0') tensor(11.0037, device='cuda:0') tensor(97.8141, device='cuda:0')
tensor(0.4065, device='cuda:0') tensor(0.3169, device='cuda:0') tensor(0.9942, device='cuda:0') tensor(599.0852, device='cuda:0') tensor(-10507.3174, device='cuda:0')
tensor(0.3459, device

tensor(0.3352, device='cuda:0') tensor(0.2393, device='cuda:0') tensor(0.9584, device='cuda:0') tensor(11.2992, device='cuda:0') tensor(19.0318, device='cuda:0')
tensor(0.3964, device='cuda:0') tensor(0.2914, device='cuda:0') tensor(1.0518, device='cuda:0') tensor(9.7607, device='cuda:0') tensor(-237.4644, device='cuda:0')
tensor(0.3789, device='cuda:0') tensor(0.2738, device='cuda:0') tensor(1.0532, device='cuda:0') tensor(10.6231, device='cuda:0') tensor(-225.6325, device='cuda:0')
tensor(0.3948, device='cuda:0') tensor(0.2969, device='cuda:0') tensor(0.9795, device='cuda:0') tensor(10.4378, device='cuda:0') tensor(-45.8769, device='cuda:0')
tensor(0.3933, device='cuda:0') tensor(0.2870, device='cuda:0') tensor(1.0634, device='cuda:0') tensor(11.6469, device='cuda:0') tensor(-133.6615, device='cuda:0')
tensor(0.3863, device='cuda:0') tensor(0.2850, device='cuda:0') tensor(1.0120, device='cuda:0') tensor(10.4068, device='cuda:0') tensor(124.0733, device='cuda:0')
tensor(0.3812, device

tensor(0.3702, device='cuda:0') tensor(0.2720, device='cuda:0') tensor(0.9780, device='cuda:0') tensor(14.9619, device='cuda:0') tensor(400.1950, device='cuda:0')
tensor(0.3252, device='cuda:0') tensor(0.2213, device='cuda:0') tensor(1.0369, device='cuda:0') tensor(10.5181, device='cuda:0') tensor(288.7343, device='cuda:0')
tensor(0.4069, device='cuda:0') tensor(0.3049, device='cuda:0') tensor(1.0171, device='cuda:0') tensor(11.5290, device='cuda:0') tensor(294.9846, device='cuda:0')
tensor(0.4299, device='cuda:0') tensor(0.3274, device='cuda:0') tensor(1.0205, device='cuda:0') tensor(14.6725, device='cuda:0') tensor(480.5697, device='cuda:0')
tensor(0.4524, device='cuda:0') tensor(0.3572, device='cuda:0') tensor(0.9560, device='cuda:0') tensor(10.3300, device='cuda:0') tensor(-433.1707, device='cuda:0')
tensor(0.3737, device='cuda:0') tensor(0.2795, device='cuda:0') tensor(0.9411, device='cuda:0') tensor(12.8222, device='cuda:0') tensor(58.4994, device='cuda:0')
tensor(0.4486, device=

tensor(0.3795, device='cuda:0') tensor(0.2776, device='cuda:0') tensor(1.0218, device='cuda:0') tensor(11.7973, device='cuda:0') tensor(-261.6412, device='cuda:0')
tensor(0.3276, device='cuda:0') tensor(0.2354, device='cuda:0') tensor(0.9304, device='cuda:0') tensor(12.4194, device='cuda:0') tensor(-868.6688, device='cuda:0')
tensor(0.4009, device='cuda:0') tensor(0.2982, device='cuda:0') tensor(1.0292, device='cuda:0') tensor(11.6859, device='cuda:0') tensor(-184.0985, device='cuda:0')
tensor(0.4049, device='cuda:0') tensor(0.3031, device='cuda:0') tensor(1.0171, device='cuda:0') tensor(14.8609, device='cuda:0') tensor(72.9417, device='cuda:0')
tensor(0.4048, device='cuda:0') tensor(0.3051, device='cuda:0') tensor(0.9937, device='cuda:0') tensor(10.9829, device='cuda:0') tensor(335.7043, device='cuda:0')
tensor(0.4103, device='cuda:0') tensor(0.3126, device='cuda:0') tensor(0.9793, device='cuda:0') tensor(12.5477, device='cuda:0') tensor(-234.7206, device='cuda:0')
tensor(0.3567, devi

tensor(0.3310, device='cuda:0') tensor(0.2411, device='cuda:0') tensor(0.9075, device='cuda:0') tensor(13.0248, device='cuda:0') tensor(-831.6759, device='cuda:0')
tensor(0.4255, device='cuda:0') tensor(0.3188, device='cuda:0') tensor(1.0649, device='cuda:0') tensor(23.5644, device='cuda:0') tensor(153.6788, device='cuda:0')
tensor(0.3718, device='cuda:0') tensor(0.2741, device='cuda:0') tensor(0.9832, device='cuda:0') tensor(14.0437, device='cuda:0') tensor(-660.3264, device='cuda:0')
tensor(0.3518, device='cuda:0') tensor(0.2484, device='cuda:0') tensor(1.0338, device='cuda:0') tensor(14.8404, device='cuda:0') tensor(-12.8961, device='cuda:0')
tensor(0.3858, device='cuda:0') tensor(0.2947, device='cuda:0') tensor(0.9202, device='cuda:0') tensor(11.7229, device='cuda:0') tensor(-950.4355, device='cuda:0')
tensor(0.4175, device='cuda:0') tensor(0.3175, device='cuda:0') tensor(1.0054, device='cuda:0') tensor(11.5809, device='cuda:0') tensor(-473.4198, device='cuda:0')
tensor(0.3853, dev

KeyboardInterrupt: 

In [73]:
float(err)

0.3430388569831848

In [28]:
n_epoch = 1
for epoch in range(n_epoch):

    len_dataloader = len(domain_val_loader)
    train_iter = iter(domain_val_loader)


    i = 0
    while i < len_dataloader:

        p = float(i + epoch * len_dataloader) / n_epoch / len_dataloader
        alpha = 2. / (1. + np.exp(-10 * p)) - 1

        # training model using source data
        data_source = train_iter.next()
        x, y, d = data_source

        vrada.zero_grad()
        batch_size = len(x)
        
        x = torch.tensor(x).float().to(device)
        y = torch.tensor(y).float().to(device)
        d = torch.tensor(d).long().to(device)


        regressor_output, domain_class_output, kld_loss, nll_loss = vrada(x)       
        err_reg = loss_regression(regressor_output, y.to(device) )




        i += 1

    print ('epoch: %d, [iter: %d / all %d], err_reg: %f' \
              % (epoch, i, len_dataloader, err_reg.data.cpu().numpy()))

    #torch.save(my_net, '{0}/mnist_mnistm_model_epoch_{1}.pth'.format(model_root, epoch))
    #test(source_dataset_name, epoch)
    #test(target_dataset_name, epoch)

print('done')




epoch: 0, [iter: 553 / all 553], err_reg: 0.031666
done


In [120]:
regressor_output.shape

torch.Size([64, 1])

In [121]:
y.shape

torch.Size([64, 1])

In [30]:
count_parameters(vrada)

51897

In [31]:
count_parameters(vrada.vrnn)

49092