In [31]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models
from tqdm import tqdm
from PIL import Image
import os
import torch.nn.functional as F
from torch.nn.modules.loss import _WeightedLoss
from sklearn.model_selection import train_test_split
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from torch.optim.swa_utils import AveragedModel, SWALR
from torch.autograd import Function

In [32]:
import random
import numpy as np

def set_random_seeds(seed_value=42):
    torch.manual_seed(seed_value)
    torch.cuda.manual_seed(seed_value)
    torch.cuda.manual_seed_all(seed_value)  # if you are using multi-GPU.
    np.random.seed(seed_value)  # Numpy module.
    random.seed(seed_value)  # Python random module.
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

set_random_seeds()

In [33]:
class PACSDataset(Dataset):
    def __init__(self, root_dir, domains, transform=None):
        self.root_dir = root_dir
        self.domains = domains
        self.transform = transform
        self.images = []
        self.labels = []
        self._load_images_labels()

    def _load_images_labels(self):
        for domain in self.domains:
            domain_dir = os.path.join(self.root_dir, domain)
            classes = sorted(
                [
                    d
                    for d in os.listdir(domain_dir)
                    if os.path.isdir(os.path.join(domain_dir, d))
                ]
            )

            for label, class_name in enumerate(classes):
                class_dir = os.path.join(domain_dir, class_name)
                for image_name in os.listdir(class_dir):
                    if image_name.endswith((".png", ".jpg", ".jpeg")):
                        self.images.append(os.path.join(class_dir, image_name))
                        self.labels.append(label)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image_path = self.images[idx]
        image = Image.open(image_path).convert("RGB")
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        # Kiểm tra xem hình ảnh có giá trị NaN hay không
        if torch.isnan(image).any():
            raise ValueError(f"Image at index {idx} contains NaN values.")

        return image, label

def get_transform():
    return transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        transforms.RandomErasing(p=0.5, scale=(0.02, 0.33), ratio=(0.3, 3.3), value=0),
    ])

In [34]:
def get_dataloader(root_dir, train_domains, test_domain, batch_size=16):
    train_dataset = PACSDataset(root_dir, train_domains, transform=get_transform())
    val_dataset = PACSDataset(root_dir, train_domains, transform=get_transform())
    test_dataset = PACSDataset(root_dir, [test_domain], transform=get_transform())
    
    # Chia train và validation
    train_size = int(0.9 * len(train_dataset))
    val_size = len(train_dataset) - train_size
    train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader, test_loader

In [35]:
from torchvision.models import efficientnet_b7

class Encoder(nn.Module):
    def __init__(self, latent_dim):
        super(Encoder, self).__init__()

        # Khởi tạo mô hình EfficientNet-B1 mà không sử dụng pretrained weights
        # self.efficientnet = efficientnet_b7(weights=models.EfficientNet_B7_Weights.IMAGENET1K_V1)
        self.efficientnet = efficientnet_b7(weights=None)

        # Lấy số features từ lớp cuối cùng của EfficientNet-B1
        in_features = self.efficientnet.classifier[1].in_features

        # Attention mechanism
        self.attention = nn.Sequential(
            nn.Linear(in_features, in_features // 16),
            nn.ReLU(),
            nn.Linear(in_features // 16, in_features),
            nn.Sigmoid(),
        )

        # Mean (mu) and log-variance (logvar) layers
        self.fc_mu = nn.Linear(in_features, latent_dim)
        self.fc_logvar = nn.Linear(in_features, latent_dim)

        self.dropout = nn.Dropout(0.5)  # Add dropout

    def forward(self, x):
        # Pass input through EfficientNet feature extractor
        features = self.efficientnet.features(x)
        x = self.efficientnet.avgpool(features)
        x = torch.flatten(x, 1)

        x = self.dropout(x)  # Apply dropout

        # Apply attention
        attention_weights = self.attention(x)
        x = x * attention_weights

        # Compute mu and logvar
        mu = self.fc_mu(x)
        logvar = self.fc_logvar(x)
        return mu, logvar

In [36]:
class Classifier(nn.Module):
    def __init__(self, latent_dim, num_classes):
        super(Classifier, self).__init__()
        self.fc = nn.Linear(latent_dim, num_classes)
        self.dropout = nn.Dropout(0.5)

    def forward(self, z):
        z = self.dropout(z)
        return self.fc(z)

In [37]:
class Decoder(nn.Module):
    def __init__(self, latent_dim, num_domains):
        super(Decoder, self).__init__()
        self.domain_embedding = nn.Embedding(num_domains, latent_dim)
        
        self.init_conv = nn.Conv2d(latent_dim, 512, 3, padding=1)
        
        self.up1 = UNetUpBlock(512, 256)
        self.up2 = UNetUpBlock(256, 128)
        self.up3 = UNetUpBlock(128, 64)
        self.up4 = UNetUpBlock(64, 32)
        self.up5 = UNetUpBlock(32, 16)
        
        self.final_conv = nn.Conv2d(16, 3, 3, padding=1)
        self.attention = nn.Sequential(nn.Conv2d(3, 1, kernel_size=1), nn.Sigmoid())

    def forward(self, z, domain_label):
        domain_embed = self.domain_embedding(domain_label)
        z = z + domain_embed
        
        x = z.view(-1, z.size(1), 1, 1)
        x = F.interpolate(x, size=(7, 7), mode='bilinear', align_corners=False)
        x = self.init_conv(x)
        
        x = self.up1(x)
        x = self.up2(x)
        x = self.up3(x)
        x = self.up4(x)
        x = self.up5(x)
        
        x = self.final_conv(x)
        
        attention_map = self.attention(x)
        x = x * attention_map
        
        return x

class UNetUpBlock(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(UNetUpBlock, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, 3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True)
        )
        self.upsample = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True)

    def forward(self, x):
        x = self.upsample(x)
        return self.conv(x)

In [38]:
class DomainClassifier(nn.Module):
    def __init__(self, latent_dim, num_domains=3):
        super(DomainClassifier, self).__init__()
        self.fc1 = nn.Linear(latent_dim, 1024)
        self.fc2 = nn.Linear(1024, 1024)
        self.fc3 = nn.Linear(1024, num_domains)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x


class GradientReversalLayer(Function):
    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha
        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        return grad_output.neg() * ctx.alpha, None

def grad_reverse(x, alpha):
    return GradientReversalLayer.apply(x, alpha)

In [39]:
class LabelSmoothingLoss(_WeightedLoss):
    def __init__(self, weight=None, reduction="mean", smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    def k_one_hot(self, targets: torch.Tensor, n_classes: int, smoothing=0.0):
        with torch.no_grad():
            targets = (
                torch.empty(size=(targets.size(0), n_classes), device=targets.device)
                .fill_(smoothing / (n_classes - 1))
                .scatter_(1, targets.data.unsqueeze(1), 1.0 - smoothing)
            )
        return targets

    def reduce_loss(self, loss):
        return (
            loss.mean()
            if self.reduction == "mean"
            else loss.sum() if self.reduction == "sum" else loss
        )

    def forward(self, inputs, targets):
        assert 0 <= self.smoothing < 1

        targets = self.k_one_hot(targets, inputs.size(-1), self.smoothing)
        log_preds = F.log_softmax(inputs, -1)

        if self.weight is not None:
            log_preds = log_preds * self.weight.unsqueeze(0)

        return self.reduce_loss(-(targets * log_preds).sum(dim=-1))

class DynamicWeightBalancer:
    def __init__(self, init_alpha=1.0, init_beta=1.0, init_gamma=1.0, init_delta=1.0, patience=5, scaling_factor=0.7):
        self.alpha = init_alpha  # Reconstruction loss weight
        self.beta = init_beta    # Classification loss weight
        self.gamma = init_gamma  # KL divergence weight
        self.delta = init_delta  # Domain loss weight
        
        self.patience = patience
        self.scaling_factor = scaling_factor
        self.best_loss = float('inf')
        self.counter = 0

    def update(self, current_loss, recon_loss, clf_loss, kl_loss, domain_loss):
        if current_loss < self.best_loss:
            self.best_loss = current_loss
            self.counter = 0
        else:
            self.counter += 1

        if self.counter >= self.patience:
            self.counter = 0
            # Increase classification weight and decrease others
            self.beta /= self.scaling_factor
            self.alpha *= self.scaling_factor
            self.gamma *= self.scaling_factor
            self.delta *= self.scaling_factor

        # Ensure classification loss weight is always significantly larger
        total_weight = self.alpha + self.beta + self.gamma + self.delta
        self.alpha = max(0.1, min(0.3, self.alpha / total_weight))
        self.beta = max(0.6, min(0.8, self.beta / total_weight))
        self.gamma = max(0.05, min(0.15, self.gamma / total_weight))
        self.delta = 1 - self.alpha - self.beta - self.gamma

        return self.alpha, self.beta, self.gamma, self.delta

In [40]:
def reparameterize(mu, logvar, dropout_rate=0.5):
    std = torch.exp(0.5 * logvar)
    eps = torch.randn_like(std)
    z = mu + eps * std
    z = F.dropout(z, p=dropout_rate, training=True)  # Apply dropout
    return z

def compute_loss(
    reconstructed_imgs_list,
    original_imgs,
    mu,
    logvar,
    predicted_labels,
    true_labels,
    domain_predictions,
    domain_labels,
    clf_loss_fn,
    domain_loss_fn,
    epoch,
    total_epochs,
    balancer,
):
    recon_loss = sum(
        F.mse_loss(recon, original_imgs, reduction="mean")
        for recon in reconstructed_imgs_list
    ) / len(reconstructed_imgs_list)

    kld_loss = -0.5 * torch.mean(1 + logvar - mu.pow(2) - logvar.exp())
    clf_loss = clf_loss_fn(predicted_labels, true_labels)
    domain_loss = domain_loss_fn(domain_predictions, domain_labels)

    #     alpha, beta, gamma, delta = balancer.update(
    #         recon_loss + clf_loss + kld_loss + domain_loss,
    #         recon_loss,
    #         clf_loss,
    #         kld_loss,
    #         domain_loss,
    #     )
    alpha = 0.1
    beta = 1
    gamma = 0.1
    delta = 0.2
    
    total_loss = (
        alpha * recon_loss + beta * clf_loss + gamma * kld_loss - delta * domain_loss
    )
    return (
        total_loss,
        recon_loss.item(),
        clf_loss.item(),
        kld_loss.item(),
        domain_loss.item(),
        alpha,
        beta,
        gamma,
        delta,
    )


In [41]:
def mixup_data(x, y, alpha=1.0, device="cuda"):
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    index = torch.randperm(batch_size).to(device)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam


def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

In [42]:
import copy


def train_model_progressive(
    encoder,
    decoders,
    classifier,
    domain_classifier,
    train_domains,
    test_domain,
    train_loader,
    val_loader,
    test_loader,
    optimizer,
    scheduler,
    num_epochs=100,
    device="cuda",
    patience=10,
):
    print("Training model with progressive domain adaptation")
    print(f"Number of epochs: {num_epochs}")
    print(f"Patience: {patience}")
    print(f"Train domains: {train_domains}")
    print(f"Test domain: {test_domain}")
    print(f"Device: {device}")
    print(f"Number of training samples: {len(train_loader.dataset)}")
    print(f"Number of validation samples: {len(val_loader.dataset)}")
    print(f"Number of test samples: {len(test_loader.dataset)}")

    clf_loss_fn = LabelSmoothingLoss(smoothing=0.1)
    domain_to_idx = {
        domain: idx for idx, domain in enumerate(train_domains + [test_domain])
    }
    domain_loss_fn = nn.CrossEntropyLoss()
    best_loss = float("inf")
    best_test_accuracy = 0.0
    patience_counter = 0
    balancer = DynamicWeightBalancer()

    # Để lưu mô hình tốt nhất
    best_model = {"encoder": None, "decoders": None, "classifier": None, "domain_classifier": None}

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")
        encoder.train()
        classifier.train()
        for domain in train_domains:
            decoders[domain].train()

        running_loss = 0.0
        running_recon_loss = 0.0
        running_clf_loss = 0.0
        running_kl_loss = 0.0
        total_samples = 0

        # Training loop on train dataset
        for inputs, labels in tqdm(train_loader, desc="Training"):
            inputs, labels = inputs.to(device), labels.to(device)
            domain_labels = torch.zeros(inputs.size(0), 1).to(
                device
            )  # 0 for source domain
            inputs, labels_a, labels_b, lam = mixup_data(
                inputs, labels, alpha=0.2, device=device
            )

            mu, logvar = encoder(inputs)
            z = reparameterize(mu, logvar)

            # Forward pass through domain classifier with gradient reversal
            p = float(epoch) / num_epochs
            alpha = 2.0 / (1.0 + np.exp(-10 * p)) - 1.0
            domain_predictions = domain_classifier(grad_reverse(z, alpha))

            reconstructed_imgs_list = []
            for domain in train_domains:
                domain_label = torch.tensor(
                    [domain_to_idx[domain]] * inputs.size(0), device=device
                )
                reconstructed_imgs = decoders[domain](z, domain_label)
                reconstructed_imgs_list.append(reconstructed_imgs)

            predicted_labels = classifier(z)

            (
                loss,
                recon_loss,
                clf_loss,
                kl_loss,
                domain_loss,
                alpha,
                beta,
                gamma,
                delta,
            ) = compute_loss(
                reconstructed_imgs_list,
                inputs,
                mu,
                logvar,
                predicted_labels,
                labels,
                domain_predictions,
                domain_labels,
                lambda pred, target: mixup_criterion(
                    clf_loss_fn, pred, labels_a, labels_b, lam
                ),
                domain_loss_fn,
                epoch,
                num_epochs,
                balancer,
            )

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            torch.nn.utils.clip_grad_norm_(params, max_norm=1.0)

            running_loss += loss.item() * inputs.size(0)
            running_recon_loss += recon_loss * inputs.size(0)
            running_clf_loss += clf_loss * inputs.size(0)
            running_kl_loss += kl_loss * inputs.size(0)
            total_samples += inputs.size(0)

        avg_loss = running_loss / total_samples
        avg_recon_loss = running_recon_loss / total_samples
        avg_clf_loss = running_clf_loss / total_samples
        avg_kl_loss = running_kl_loss / total_samples

        print(
            f"Epoch {epoch + 1}, Loss: {avg_loss:.4f}, Recon: {avg_recon_loss:.4f}, Clf: {avg_clf_loss:.4f}, KL: {avg_kl_loss:.4f} Domain: {domain_loss:.4f}"
        )
        print(
            f"Weights - Alpha: {alpha:.4f}, Beta: {beta:.4f}, Gamma: {gamma:.4f}, Delta: {delta:.4f}"
        )

        # Validation
        encoder.eval()
        classifier.eval()
        for domain in train_domains:
            decoders[domain].eval()

        val_running_loss = 0.0
        with torch.no_grad():
            for inputs, labels in tqdm(val_loader, desc="Validating"):
                inputs, labels = inputs.to(device), labels.to(device)

                mu, logvar = encoder(inputs)
                z = reparameterize(mu, logvar)

                reconstructed_imgs_list = []
                for domain in train_domains:
                    domain_label = torch.tensor(
                        [domain_to_idx[domain]] * inputs.size(0), device=device
                    )
                    reconstructed_imgs = decoders[domain](z, domain_label)
                    reconstructed_imgs_list.append(reconstructed_imgs)

                predicted_labels = classifier(z)

                val_loss, _, _, _, _, _, _, _, _ = compute_loss(
                    reconstructed_imgs_list,
                    inputs,
                    mu,
                    logvar,
                    predicted_labels,
                    labels,
                    domain_predictions,
                    domain_labels,
                    clf_loss_fn,
                    domain_loss_fn,
                    epoch,
                    num_epochs,
                    balancer,
                )
                val_running_loss += val_loss.item()

        avg_val_loss = val_running_loss / len(val_loader)
        print(f"Validation Loss: {avg_val_loss:.4f}")

        # Đánh giá trên tập test
        if (epoch + 1) % 3 == 0:
            print(
                f"--- Evaluating on Test Domain ({test_domain}) at Epoch {epoch + 1} ---"
            )
            test_accuracy, test_loss, domain_accuracy = evaluate_model(
                encoder, classifier, domain_classifier, test_loader, device
            )
            print(f"Test Accuracy: {test_accuracy:.2f}%, Test Loss: {test_loss:.4f}, Domain Accuracy: {domain_accuracy:.2f}%")

            # Save best model based on test accuracy
            if test_accuracy > best_test_accuracy:
                best_test_accuracy = test_accuracy
                best_model["encoder"] = copy.deepcopy(encoder.state_dict())
                best_model["decoders"] = {
                    domain: copy.deepcopy(decoder.state_dict())
                    for domain, decoder in decoders.items()
                }
                best_model["classifier"] = copy.deepcopy(classifier.state_dict())
                best_model["domain_classifier"] = copy.deepcopy(domain_classifier.state_dict())
                print(
                    f"New best model saved with test accuracy: {best_test_accuracy:.2f}%"
                )

        # Early stopping based on validation loss
        if avg_val_loss < best_loss:
            best_loss = avg_val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping triggered after {epoch + 1} epochs")
                break

        scheduler.step(avg_val_loss)

    # Load best model
    encoder.load_state_dict(best_model["encoder"])
    for domain, state_dict in best_model["decoders"].items():
        decoders[domain].load_state_dict(state_dict)
    classifier.load_state_dict(best_model["classifier"])
    domain_classifier.load_state_dict(best_model["domain_classifier"])

    print(f"Training completed. Best test accuracy: {best_test_accuracy:.2f}%")

    return encoder, decoders, classifier

In [43]:
def evaluate_model(encoder, classifier, domain_classifier, dataloader, device):
    encoder.eval()
    classifier.eval()
    domain_classifier.eval()
    correct = 0
    total = 0
    running_loss = 0.0
    domain_correct = 0
    clf_loss_fn = nn.CrossEntropyLoss()
    domain_loss_fn = nn.BCEWithLogitsLoss()

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            domain_labels = torch.ones(inputs.size(0), 1).to(
                device
            )  # 1 for target domain

            mu, logvar = encoder(inputs)
            z = reparameterize(mu, logvar)
            outputs = classifier(z)
            domain_outputs = domain_classifier(z)

            loss = clf_loss_fn(outputs, labels)
            running_loss += loss.item() * inputs.size(0)

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            domain_pred = (domain_outputs > 0.5).float()
            domain_correct += (domain_pred == domain_labels).sum().item()

    accuracy = 100 * correct / total
    avg_loss = running_loss / total
    domain_accuracy = 100 * domain_correct / total
    return accuracy, avg_loss, domain_accuracy


In [44]:
# Main training and evaluation script
DATA_PATH = "/kaggle/input/pacs-dataset/kfold"
latent_dim = 256
num_classes = 7
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

train_domains = ["art_painting", "cartoon", "sketch"]
test_domain = "photo"
all_domains = train_domains + [test_domain]

# Initialize models
encoder = Encoder(latent_dim).to(device)
decoders = {domain: Decoder(latent_dim, len(train_domains)).to(device) for domain in train_domains}
classifier = Classifier(latent_dim, num_classes).to(device)
domain_classifier = DomainClassifier(latent_dim).to(device)

# Optimizer and Scheduler
params = list(encoder.parameters()) + list(classifier.parameters())
for decoder in decoders.values():
    params += list(decoder.parameters())

optimizer = optim.AdamW(params, lr=5e-4, weight_decay=1e-3) 
scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2, eta_min=1e-6)
num_epochs = 150  # Tăng số epoch

# Get dataloaders
train_loader, val_loader, test_loader = get_dataloader(DATA_PATH, train_domains, test_domain)

# Train model
encoder, decoders, classifier = train_model_progressive(
    encoder,
    decoders,
    classifier,
    domain_classifier,
    train_domains,
    test_domain,
    train_loader,
    val_loader,
    test_loader,
    optimizer,
    scheduler,
    num_epochs,
    device=device,
    patience=10,
)

print(f"Final evaluation on test domain: {test_domain}")
test_accuracy, test_loss, domain_accuracy = evaluate_model(
    encoder, classifier, domain_classifier, test_loader, device
)
print(f"Test Accuracy: {test_accuracy:.2f}%, Test Loss: {test_loss:.4f}, Domain Accuracy: {domain_accuracy:.2f}%")
# Final evaluation on the test domain
print(f"Final evaluation on test domain: {test_domain}")
test_accuracy, test_loss, domain_accuracy = evaluate_model(
    encoder, classifier, domain_classifier, test_loader, device
)
print(f"Test Accuracy: {test_accuracy:.2f}%, Test Loss: {test_loss:.4f} Domain Accuracy: {domain_accuracy:.2f}%")

Using device: cuda
Training model with progressive domain adaptation
Number of epochs: 150
Patience: 10
Train domains: ['art_painting', 'cartoon', 'sketch']
Test domain: photo
Device: cuda
Number of training samples: 832
Number of validation samples: 7489
Number of test samples: 1670
Epoch 1/150


Training:   0%|          | 0/52 [00:00<?, ?it/s]

domain predictions tensor([[ 0.0439],
        [ 0.1539],
        [-0.2274],
        [-0.4488],
        [-0.0793],
        [ 0.0304],
        [-0.1193],
        [ 0.1158],
        [ 0.1734],
        [-0.3065],
        [-0.0356],
        [ 0.2071],
        [-0.1566],
        [-0.1367],
        [-0.2794],
        [-0.2330]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:   2%|▏         | 1/52 [00:00<00:39,  1.28it/s]

domain predictions tensor([[-0.9284],
        [-0.2196],
        [ 0.3168],
        [ 0.0987],
        [-0.0696],
        [-0.1328],
        [ 0.1236],
        [ 0.0944],
        [-0.0679],
        [ 0.0611],
        [-0.1940],
        [ 0.3545],
        [-0.2802],
        [ 0.2723],
        [ 0.0762],
        [-0.0970]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:   4%|▍         | 2/52 [00:01<00:37,  1.35it/s]

domain predictions tensor([[-0.4311],
        [-0.0381],
        [-0.0926],
        [-0.2520],
        [-0.5113],
        [ 0.1155],
        [-0.0547],
        [-0.1290],
        [-0.1395],
        [-0.2754],
        [ 0.0820],
        [-0.1608],
        [-0.0220],
        [-0.0949],
        [-0.3521],
        [ 0.0710]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:   6%|▌         | 3/52 [00:02<00:36,  1.35it/s]

domain predictions tensor([[-0.4773],
        [ 0.4708],
        [-0.1303],
        [-0.1346],
        [-0.1004],
        [ 0.2344],
        [-0.0475],
        [-0.2334],
        [-0.1991],
        [-1.0036],
        [-0.2289],
        [ 0.4647],
        [ 0.2541],
        [-0.1496],
        [ 0.1841],
        [-0.1302]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:   8%|▊         | 4/52 [00:02<00:35,  1.37it/s]

domain predictions tensor([[ 0.3128],
        [ 0.0053],
        [-0.0914],
        [-0.0499],
        [-0.1627],
        [ 0.2984],
        [ 0.2198],
        [-0.3507],
        [ 0.2543],
        [ 0.0147],
        [ 0.1901],
        [-0.5297],
        [-0.4948],
        [-0.0709],
        [-0.1253],
        [-0.0855]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  10%|▉         | 5/52 [00:03<00:34,  1.37it/s]

domain predictions tensor([[-0.1570],
        [-0.4795],
        [-0.3049],
        [-0.0809],
        [ 0.3754],
        [-0.1718],
        [ 0.1736],
        [ 0.2498],
        [ 0.0696],
        [-0.1413],
        [ 0.0778],
        [-0.5950],
        [ 0.0954],
        [ 0.1709],
        [-0.8241],
        [ 0.1475]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  12%|█▏        | 6/52 [00:04<00:33,  1.37it/s]

domain predictions tensor([[-0.0875],
        [-0.3253],
        [ 0.0970],
        [ 0.7079],
        [-0.2958],
        [ 0.2771],
        [ 0.3537],
        [ 0.1329],
        [-0.0330],
        [ 0.1374],
        [-0.0254],
        [-0.2657],
        [-0.0303],
        [ 0.1240],
        [-0.1484],
        [-0.3071]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  13%|█▎        | 7/52 [00:05<00:32,  1.37it/s]

domain predictions tensor([[ 0.2610],
        [ 0.1348],
        [-0.0796],
        [ 0.1843],
        [-0.1253],
        [-0.2523],
        [ 0.4802],
        [-0.4105],
        [-0.3478],
        [-0.1046],
        [ 0.6479],
        [-0.3465],
        [-0.1650],
        [ 0.0070],
        [-0.1200],
        [ 0.0656]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  15%|█▌        | 8/52 [00:05<00:32,  1.36it/s]

domain predictions tensor([[ 0.5360],
        [-0.1060],
        [-0.1667],
        [ 0.1929],
        [-0.0918],
        [ 0.6101],
        [-0.0099],
        [ 0.0896],
        [-0.1647],
        [-0.2075],
        [-0.1051],
        [-0.4203],
        [-0.3324],
        [-0.1905],
        [ 0.0089],
        [-0.3192]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  17%|█▋        | 9/52 [00:06<00:31,  1.36it/s]

domain predictions tensor([[-0.8141],
        [ 0.0406],
        [-0.0250],
        [ 0.2235],
        [-0.3825],
        [-0.4282],
        [ 0.1551],
        [-0.1742],
        [-0.3457],
        [-0.1173],
        [-0.1648],
        [ 0.0209],
        [ 0.4001],
        [-0.0269],
        [-0.0149],
        [-0.0093]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  19%|█▉        | 10/52 [00:07<00:30,  1.36it/s]

domain predictions tensor([[ 0.0261],
        [ 0.3975],
        [ 0.0359],
        [-0.1767],
        [ 0.2031],
        [-0.2450],
        [-0.0590],
        [-0.0945],
        [-0.2258],
        [-0.0568],
        [-0.0812],
        [-0.6195],
        [ 0.1333],
        [-0.3028],
        [-0.3345],
        [ 0.2901]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  21%|██        | 11/52 [00:08<00:29,  1.37it/s]

domain predictions tensor([[-0.1025],
        [-0.0713],
        [ 0.0090],
        [ 0.1236],
        [ 0.0385],
        [ 0.4355],
        [-0.0805],
        [ 0.0976],
        [-0.0887],
        [ 0.2006],
        [-0.1410],
        [ 0.2026],
        [ 0.0670],
        [ 0.0523],
        [ 0.0760],
        [-0.0147]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  23%|██▎       | 12/52 [00:08<00:29,  1.37it/s]

domain predictions tensor([[ 0.0119],
        [-0.0494],
        [ 0.0987],
        [-0.3474],
        [-0.3411],
        [-0.1155],
        [-0.0866],
        [ 0.0230],
        [ 0.1624],
        [-0.1040],
        [ 0.4110],
        [-0.0535],
        [ 0.0217],
        [-0.0269],
        [ 0.5047],
        [ 0.5321]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  25%|██▌       | 13/52 [00:09<00:28,  1.36it/s]

domain predictions tensor([[-0.1041],
        [-0.0744],
        [-0.3963],
        [-0.3789],
        [-0.0649],
        [ 0.1552],
        [-0.0893],
        [-0.2485],
        [-0.4770],
        [ 0.3260],
        [-0.0558],
        [ 0.0552],
        [ 0.5875],
        [-0.4078],
        [ 0.0563],
        [-0.2464]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  27%|██▋       | 14/52 [00:10<00:27,  1.37it/s]

domain predictions tensor([[ 0.0084],
        [ 0.0737],
        [-0.1374],
        [-0.1678],
        [ 0.1955],
        [-0.2193],
        [-0.0855],
        [-0.2667],
        [-0.2982],
        [-0.4637],
        [-0.3247],
        [-0.0298],
        [ 0.7572],
        [ 0.0015],
        [ 0.1387],
        [ 0.1102]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  29%|██▉       | 15/52 [00:11<00:27,  1.36it/s]

domain predictions tensor([[ 0.0428],
        [-0.6070],
        [ 0.5845],
        [-0.4517],
        [-0.2037],
        [-0.1021],
        [-0.0438],
        [ 0.2342],
        [-0.0598],
        [ 0.1268],
        [-0.1753],
        [-0.2773],
        [-0.1051],
        [ 0.0782],
        [-0.1589],
        [-0.4223]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  31%|███       | 16/52 [00:11<00:26,  1.36it/s]

domain predictions tensor([[ 0.2856],
        [-0.2786],
        [-0.4168],
        [-0.2848],
        [ 0.2392],
        [ 0.1052],
        [-0.2449],
        [-0.2040],
        [ 0.0621],
        [ 0.1092],
        [-0.1049],
        [-0.5002],
        [-0.1030],
        [ 0.0495],
        [-0.1351],
        [-0.2540]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  33%|███▎      | 17/52 [00:12<00:25,  1.36it/s]

domain predictions tensor([[-0.2340],
        [-0.2177],
        [-0.0961],
        [-0.1419],
        [ 0.1652],
        [-0.2511],
        [-0.4240],
        [-0.1598],
        [ 0.2596],
        [ 0.1529],
        [ 0.0675],
        [-1.4376],
        [-0.3648],
        [-0.0170],
        [ 0.0709],
        [ 0.2475]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  35%|███▍      | 18/52 [00:13<00:25,  1.36it/s]

domain predictions tensor([[-0.3366],
        [-0.0379],
        [-0.4659],
        [-0.2282],
        [ 0.2607],
        [ 0.0323],
        [ 0.2288],
        [-0.4854],
        [-0.4098],
        [-0.2662],
        [-0.3131],
        [-0.3707],
        [-0.0648],
        [ 0.3891],
        [ 0.0522],
        [ 0.0233]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  37%|███▋      | 19/52 [00:13<00:24,  1.34it/s]

domain predictions tensor([[ 0.0289],
        [-0.1465],
        [ 0.0533],
        [-0.4699],
        [-0.2300],
        [-0.0651],
        [ 0.1887],
        [-0.3409],
        [-0.2252],
        [-0.2868],
        [-0.4400],
        [-0.2598],
        [-0.1428],
        [ 0.1102],
        [-0.2456],
        [ 0.2204]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  38%|███▊      | 20/52 [00:14<00:24,  1.32it/s]

domain predictions tensor([[-0.2346],
        [-0.7421],
        [ 0.0112],
        [ 0.0670],
        [ 0.0909],
        [ 0.3031],
        [-0.2629],
        [ 0.0401],
        [ 0.0720],
        [-0.1333],
        [ 0.3835],
        [ 0.3177],
        [-0.2549],
        [-0.1581],
        [ 0.1449],
        [-0.0129]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  40%|████      | 21/52 [00:15<00:23,  1.33it/s]

domain predictions tensor([[-0.0381],
        [ 0.0189],
        [-0.0540],
        [-0.1905],
        [-0.0513],
        [ 0.1089],
        [ 0.0770],
        [-0.0947],
        [ 0.0296],
        [ 0.0536],
        [-0.1913],
        [-0.1153],
        [ 0.2814],
        [-0.2379],
        [ 0.3405],
        [-0.2129]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  42%|████▏     | 22/52 [00:16<00:22,  1.33it/s]

domain predictions tensor([[-0.3295],
        [ 0.4537],
        [-0.7915],
        [-0.2695],
        [-0.0319],
        [-0.0956],
        [-0.2650],
        [ 0.2524],
        [-0.2404],
        [-0.1075],
        [-0.0422],
        [-0.3652],
        [ 0.0642],
        [ 0.1111],
        [-0.3112],
        [ 0.1236]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  44%|████▍     | 23/52 [00:16<00:21,  1.35it/s]

domain predictions tensor([[ 0.0389],
        [-0.0530],
        [ 0.1820],
        [-0.5159],
        [-0.0735],
        [-0.2273],
        [ 0.2548],
        [ 0.0753],
        [ 0.0270],
        [ 0.3885],
        [-0.4079],
        [-0.3667],
        [ 0.4055],
        [-0.0520],
        [-0.4346],
        [-0.3889]], device='cuda:0', grad_fn=<AddmmBackward0>)


Training:  46%|████▌     | 24/52 [00:17<00:20,  1.34it/s]


KeyboardInterrupt: 