# Import Modules

In [1]:
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

import utils

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Prepare Data

In [3]:
# source_X: shape of (N_source, D)
# source_y_domain: (N_source, 1)
# source_y_task: (N_source, 1)
# source_Y: (N_source, 2)

# target_X: (N_target, D)
# target_y_domain: (N_target, )
# target_y_task: (N_target, )

source_X, target_X, source_y_task, target_y_task, _, _, _ = utils.get_source_target()
source_y_domain = np.zeros_like(source_y_task).reshape(-1, 1)
source_y_task = source_y_task.reshape(-1, 1)

source_Y = np.concatenate([source_y_task, source_y_domain], axis=1)
target_y_domain = np.ones_like(target_y_task)

print(f"N_source: {source_X.shape[0]}")
print(f"D: {source_X.shape[1]}\n")

print(f"source_X: {source_X.shape}")
print(f"source_y_domain: {source_y_domain.shape}")
print(f"source_y_task: {source_y_task.shape}")
print(f"source_Y: {source_Y.shape}\n")

print(f"N_target: {target_X.shape[0]}")
print(f"target_X: {target_X.shape}")
print(f"target_y_domain: {target_y_domain.shape}")
print(f"target_y_task: {target_y_task.shape}")

N_source: 100
D: 2

source_X: (100, 2)
source_y_domain: (100, 1)
source_y_task: (100, 1)
source_Y: (100, 2)

N_target: 100
target_X: (100, 2)
target_y_domain: (100,)
target_y_task: (100,)


In [4]:
source_X = torch.tensor(source_X, dtype=torch.float32)
source_Y = torch.tensor(source_Y, dtype=torch.float32)
target_X = torch.tensor(target_X, dtype=torch.float32)
target_y_domain = torch.tensor(target_y_domain, dtype=torch.float32)
target_y_task = torch.tensor(target_y_task, dtype=torch.float32)

source_X = source_X.to(device)
source_Y = source_Y.to(device)
target_X = target_X.to(device)
target_y_domain = target_y_domain.to(device)
target_y_task = target_y_task.to(device)

source_ds = TensorDataset(source_X, source_Y)
target_ds = TensorDataset(target_X, target_y_domain)

source_loader = DataLoader(source_ds, batch_size=16, shuffle=True)
target_loader = DataLoader(target_ds, batch_size=16, shuffle=True)

# Instantiate Feature Extractor, Domain Classifier, Task Classifier

In [5]:
class MLP(nn.Module):
    def __init__(self, input_size, output_size, dropout_ratio):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 10)
        self.fc2 = nn.Linear(10, 15)
        self.fc3 = nn.Linear(15, output_size)
        self.bn1 = nn.BatchNorm1d(10)
        self.dropout = nn.Dropout(dropout_ratio)

    def forward(self, x):
        x = self.dropout(F.relu(self.fc1(x)))
        x = self.bn1(x)
        x = self.dropout(F.relu(self.fc2(x)))
        x = self.fc3(x)
        return x

In [6]:
hidden_size = 100
num_domains = 1
num_classes = 1
dropout_ratio = 0.5

feature_extractor = MLP(input_size=source_X.shape[1], output_size=hidden_size, dropout_ratio=0.5).to(device)
domain_classifier = MLP(input_size=hidden_size, output_size=num_domains, dropout_ratio=0.5).to(device)
task_classifier = MLP(input_size=hidden_size, output_size=num_classes, dropout_ratio=0.5).to(device)

In [7]:
learning_rate = 0.01

criterion = nn.BCELoss()
feature_optimizer = optim.Adam(feature_extractor.parameters(), lr=learning_rate)
domain_optimizer = optim.Adam(domain_classifier.parameters(), lr=learning_rate)
task_optimizer = optim.Adam(task_classifier.parameters(), lr=learning_rate)

# Feature Invariant Learning

In [9]:
num_epochs = 1
alpha = 0.1
# Parameters: source_loader, target_loader, num_epochs

for _ in range(num_epochs):
    for (source_X, source_Y), (target_X, target_y_domain) in zip(source_loader, target_loader):
        # 0. Data
        source_X = source_X.to(device)
        source_y_task = source_Y[:, 0].to(device)
        source_y_domain = source_Y[:, 1].to(device)
        target_X = target_X.to(device)
        target_y_domain = target_y_domain.to(device)

        # 1. Forward
        # 1.1 Feature Extractor
        source_X, target_X = feature_extractor(source_X), feature_extractor(target_X)

        # 1.2. Domain Classifier
        pred_source_y_domain, pred_target_y_domain = domain_classifier(source_X), domain_classifier(target_X)
        pred_source_y_domain, pred_target_y_domain = torch.sigmoid(pred_source_y_domain), torch.sigmoid(pred_target_y_domain)

        loss_domain = criterion(pred_source_y_domain.reshape(-1), source_y_domain.reshape(-1))
        loss_domain += criterion(pred_target_y_domain.reshape(-1), target_y_domain.reshape(-1))

        # 1.3. Task Classifier
        pred_y_task = task_classifier(source_X)
        pred_y_task = torch.sigmoid(pred_y_task)
        loss_task = criterion(pred_y_task.reshape(-1), source_y_task.reshape(-1))

        # 2. Backward, Update Params
        loss_domain.backward(retain_graph=True)
        domain_optimizer.step()

        loss_task.backward(retain_graph=True)
        task_optimizer.step()

        loss_feature = loss_task - alpha * loss_domain
        loss_feature.backward()
        feature_optimizer.step()
        break
    # 4. Evaluation

RuntimeError: one of the variables needed for gradient computation has been modified by an inplace operation: [torch.cuda.FloatTensor [15, 1]], which is output 0 of TBackward, is at version 3; expected version 2 instead. Hint: enable anomaly detection to find the operation that failed to compute its gradient, with torch.autograd.set_detect_anomaly(True).