In [129]:
import numpy as np
import pandas as pd

In [130]:
pad_ufes_20_df = pd.read_csv(
  "/Users/robp/spotplotter-data/PAD-UFES-20/metadata.csv",
)

allowed_classes = ['MEL', 'NV', 'BCC', 'AK', 'BKL', 'SCC']

pad_ufes_20_df = pad_ufes_20_df[pad_ufes_20_df['diagnostic'].isin(allowed_classes)]

pad_ufes_20_df['diagnostic'] = pad_ufes_20_df['diagnostic'].replace({
    'MEL': 'melanoma',
    'NV': 'melanocytic nevus',
    'BCC': 'basal cell carcinoma',
    'AK': 'actinic keratosis',
    'BKL': 'benign keratosis',
    'SCC': 'squamous cell carcinoma',
})

pad_ufes_20_df['diagnostic'] = pad_ufes_20_df['diagnostic'].astype('category')
pad_ufes_20_df['label'] = pad_ufes_20_df['diagnostic'].cat.codes

pad_ufes_20_df['image'] = pad_ufes_20_df['img_id']
pad_ufes_20_df.drop(columns=['img_id'], inplace=True)

pad_ufes_20_df['age'] = pad_ufes_20_df['age'].fillna(0) / 100.0
pad_ufes_20_df['gender'] = pad_ufes_20_df['gender'].map({'male': 1.0, 'female': 0.0}).fillna(0.5)

pad_ufes_20_df['anatom_site_general'] = pad_ufes_20_df['region']

display(pad_ufes_20_df)


Unnamed: 0,patient_id,lesion_id,smoke,drink,background_father,background_mother,age,pesticide,gender,skin_cancer_history,...,itch,grew,hurt,changed,bleed,elevation,biopsed,label,image,anatom_site_general
1,PAT_46,881,False,False,POMERANIA,POMERANIA,0.55,False,0.5,True,...,TRUE,TRUE,FALSE,TRUE,TRUE,TRUE,True,0,PAT_46_881_939.png,NECK
4,PAT_684,1302,False,True,POMERANIA,POMERANIA,0.79,False,0.5,True,...,TRUE,TRUE,FALSE,FALSE,TRUE,TRUE,True,0,PAT_684_1302_588.png,FOREARM
6,PAT_778,1471,False,True,GERMANY,ITALY,0.52,False,0.5,False,...,FALSE,TRUE,FALSE,TRUE,TRUE,TRUE,True,0,PAT_778_1471_835.png,FACE
7,PAT_117,179,False,False,POMERANIA,POMERANIA,0.74,True,0.5,False,...,TRUE,TRUE,TRUE,FALSE,TRUE,TRUE,True,0,PAT_117_179_983.png,FACE
11,PAT_967,1827,False,False,POMERANIA,POMERANIA,0.34,True,0.5,True,...,TRUE,UNK,FALSE,UNK,TRUE,TRUE,True,0,PAT_967_1827_247.png,NOSE
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2287,PAT_754,1429,False,False,ITALY,GERMANY,0.75,False,0.5,True,...,FALSE,TRUE,FALSE,FALSE,FALSE,FALSE,True,1,PAT_754_1429_380.png,FOREARM
2291,PAT_273,421,False,False,POMERANIA,POMERANIA,0.41,True,0.5,False,...,TRUE,UNK,TRUE,UNK,TRUE,TRUE,True,0,PAT_273_421_905.png,FACE
2292,PAT_491,934,False,False,POMERANIA,POMERANIA,0.43,True,0.5,True,...,TRUE,UNK,FALSE,UNK,TRUE,TRUE,True,2,PAT_491_934_46.png,FACE
2294,PAT_46,880,False,False,POMERANIA,POMERANIA,0.55,False,0.5,True,...,TRUE,TRUE,FALSE,TRUE,FALSE,FALSE,True,0,PAT_46_880_140.png,CHEST


In [131]:
isic_2019_df = pd.read_csv(
  "/Users/robp/spotplotter-data/ISIC_2019/ISIC_2019_Training_GroundTruth.csv",
)

display(isic_2019_df)

class_columns = ['MEL', 'NV', 'BCC', 'AK', 'BKL', 'DF', 'VASC', 'SCC']

def get_label(row):
    return np.argmax(row[class_columns].values)

# TODO: can we use the unknown column?
isic_2019_df = isic_2019_df[isic_2019_df['UNK'] == 0]
isic_2019_df['label'] = isic_2019_df.apply(get_label, axis=1)

isic_2019_df['label'] = isic_2019_df['label'].replace(
    {
        0: "melanoma",
        1: "melanocytic nevus",
        2: "basal cell carcinoma",
        3: "actinic keratosis",
        4: "benign keratosis",
        5: "squamous cell carcinoma",
    }
)

isic_2019_df['label'] = isic_2019_df['label'].astype('category')

isic_2019_df = isic_2019_df.drop(columns=class_columns)
isic_2019_df = isic_2019_df.drop(columns=['UNK'])

isic_2019_metadata_df = pd.read_csv(
  "/Users/robp/spotplotter-data/ISIC_2019/ISIC_2019_Training_Metadata.csv",
)
isic_2019_metadata_df = isic_2019_metadata_df.set_index('image')
isic_2019_df = isic_2019_df.set_index('image')
isic_2019_df = isic_2019_df.join(isic_2019_metadata_df, how='inner')
isic_2019_df = isic_2019_df.reset_index()

isic_2019_df['image'] = isic_2019_df['image'].apply(lambda x: f"{x}.jpg")

isic_2019_df['gender'] = isic_2019_df['sex']
isic_2019_df['age'] = isic_2019_df['age_approx']
isic_2019_df.drop(columns=['age_approx', 'sex'], inplace=True)

isic_2019_df['gender'] = isic_2019_df['gender'].map({'male': 1.0, 'female': 0.0}).fillna(0.5)
isic_2019_df['age'] = isic_2019_df['age'].fillna(0) / 100.0

isic_2019_df['diagnostic'] = isic_2019_df['label'].astype('category')
isic_2019_df['label'] = isic_2019_df['diagnostic'].cat.codes
allowed_labels = [0, 1, 2, 3, 4, 5]  # e.g., up to 6 classes
isic_2019_df = isic_2019_df[isic_2019_df['label'].isin(allowed_labels)]



display(isic_2019_df)

Unnamed: 0,image,MEL,NV,BCC,AK,BKL,DF,VASC,SCC,UNK
0,ISIC_0000000,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,ISIC_0000001,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,ISIC_0000002,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,ISIC_0000003,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,ISIC_0000004,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...
25326,ISIC_0073247,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
25327,ISIC_0073248,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
25328,ISIC_0073249,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25329,ISIC_0073251,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,image,label,anatom_site_general,lesion_id,gender,age,diagnostic
0,ISIC_0000000.jpg,5,anterior torso,,0.0,0.55,melanocytic nevus
1,ISIC_0000001.jpg,5,anterior torso,,0.0,0.30,melanocytic nevus
3,ISIC_0000003.jpg,5,upper extremity,,1.0,0.30,melanocytic nevus
5,ISIC_0000006.jpg,5,posterior torso,,0.0,0.25,melanocytic nevus
6,ISIC_0000007.jpg,5,posterior torso,,0.0,0.25,melanocytic nevus
...,...,...,...,...,...,...,...
25325,ISIC_0073246.jpg,3,anterior torso,BCN_0005265,1.0,0.80,basal cell carcinoma
25326,ISIC_0073247.jpg,3,head/neck,BCN_0003925,0.0,0.85,basal cell carcinoma
25327,ISIC_0073248.jpg,4,anterior torso,BCN_0001819,1.0,0.65,benign keratosis
25329,ISIC_0073251.jpg,5,palms/soles,BCN_0002083,0.0,0.55,melanocytic nevus


In [132]:
import numpy as np

def build_metadata_vector(df, site_encoder, site_column, site_prefix="site_"):
    # One-hot encode anatomical site using pre-fitted encoder
    site_ohe = site_encoder.transform(df[[site_column]].fillna('unknown'))
    site_feature_names = [f"{site_prefix}{c}" for c in site_encoder.categories_[0]]
    site_df = pd.DataFrame(site_ohe, columns=site_feature_names, index=df.index)

    # Concatenate age, gender, and one-hot site
    meta_vector = np.concatenate([
        df[['age', 'gender']].to_numpy(),
        site_df.to_numpy()
    ], axis=1)

    return meta_vector.astype(np.float32)

In [133]:
from PIL import Image
from torch.utils.data import Dataset
import torch

class SkinLesionDataset(Dataset):
    def __init__(self, dataframe, img_root, meta_vectors, transform, is_source=True):
        self.df = dataframe
        self.img_root = img_root
        self.meta_vectors = meta_vectors
        self.transform = transform
        self.is_source = is_source

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img_path = f"{self.img_root}/{row['image']}"
        image = Image.open(img_path).convert("RGB")
        image = self.transform(image)

        metadata = torch.tensor(self.meta_vectors[idx], dtype=torch.float32)

        label = torch.tensor(row['label'], dtype=torch.long)

        return image, label, metadata

In [134]:
from torchvision import transforms

image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [135]:
from sklearn.preprocessing import OneHotEncoder

# Fit shared site encoder
all_sites = pd.concat([
    isic_2019_df[['anatom_site_general']],
    pad_ufes_20_df[['anatom_site_general']]
])
site_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')
site_encoder.fit(all_sites.fillna('unknown'))

# Build metadata
isic_meta = build_metadata_vector(isic_2019_df, site_encoder, 'anatom_site_general')
pad_meta = build_metadata_vector(pad_ufes_20_df, site_encoder, 'anatom_site_general')

# Create datasets
isic_dataset = SkinLesionDataset(isic_2019_df, "/Users/robp/spotplotter-data/ISIC_2019/ISIC_2019_Training_Input/", isic_meta, image_transform, is_source=True)
pad_dataset = SkinLesionDataset(pad_ufes_20_df, "/Users/robp/spotplotter-data/PAD-UFES-20/images/", pad_meta, image_transform, is_source=False)



In [136]:
# Dataloaders
from torch.utils.data import DataLoader

source_loader = DataLoader(isic_dataset, batch_size=32, shuffle=True, num_workers=0)
target_loader = DataLoader(pad_dataset, batch_size=32, shuffle=True, num_workers=0)

In [137]:
import torch.nn as nn
from efficientnet_pytorch import EfficientNet


class GradReverse(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha
        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        return grad_output.neg() * ctx.alpha, None

def grad_reverse(x, alpha):
    return GradReverse.apply(x, alpha)

class MetadataEncoder(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Linear(32, output_dim)
        )

    def forward(self, x):
        return self.encoder(x)

class DANN(nn.Module):
    def __init__(self, num_classes, metadata_dim):
        super().__init__()
        self.feature_extractor = EfficientNet.from_pretrained('efficientnet-b0')
        self.feature_extractor._fc = nn.Identity()
        self.feature_dim = 1280  # for efficientnet-b0

        self.metadata_encoder = MetadataEncoder(metadata_dim, 32)

        self.class_classifier = nn.Sequential(
            nn.Linear(self.feature_dim + 32, 100),
            nn.ReLU(),
            nn.Linear(100, num_classes)
        )

        self.domain_classifier = nn.Sequential(
            nn.Linear(self.feature_dim, 100),
            nn.ReLU(),
            nn.Linear(100, 2)
        )

    def forward(self, image, metadata, alpha):
        features = self.feature_extractor(image)  # shape: [B, 1280]
        metadata_encoded = self.metadata_encoder(metadata)  # shape: [B, 32]
        combined = torch.cat([features, metadata_encoded], dim=1)  # shape: [B, 1312]

        class_output = self.class_classifier(combined)

        reverse_features = grad_reverse(features, alpha)
        domain_output = self.domain_classifier(reverse_features)
        return class_output, domain_output



In [138]:
import torch
import torch.nn as nn

def train_dann(model, source_loader, target_loader, optimizer, epoch, alpha, device):
    model.train()
    criterion = nn.CrossEntropyLoss()

    total_loss = 0.0
    correct = 0
    total = 0
    tgt_correct = 0
    tgt_total = 0

    for (src_img, src_lbl, src_meta), (tgt_img, tgt_lbl, tgt_meta) in zip(source_loader, target_loader):
        src_img, src_lbl, src_meta = src_img.to(device), src_lbl.to(device), src_meta.to(device)
        tgt_img, tgt_lbl, tgt_meta = tgt_img.to(device), tgt_lbl.to(device), tgt_meta.to(device)

        # Forward pass on source
        class_output, domain_output_s = model(src_img, src_meta, alpha)
        class_loss = criterion(class_output, src_lbl)
        domain_loss_s = criterion(domain_output_s, torch.zeros(src_img.size(0), dtype=torch.long).to(device))

        # Forward pass on target (domain only)
        _, domain_output_t = model(tgt_img, tgt_meta, alpha)
        domain_loss_t = criterion(domain_output_t, torch.ones(tgt_img.size(0), dtype=torch.long).to(device))

        # Total loss
        loss = class_loss + domain_loss_s + domain_loss_t

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        # Track source accuracy
        preds = class_output.argmax(dim=1)
        correct += (preds == src_lbl).sum().item()
        total += src_lbl.size(0)

        # Forward pass on target (for classification accuracy evaluation only)
        class_output_tgt, domain_output_t = model(tgt_img, tgt_meta, alpha)

        # Domain loss (same as before)
        domain_loss_t = criterion(domain_output_t, torch.ones(tgt_img.size(0), dtype=torch.long).to(device))

        # Track target accuracy
        preds_tgt = class_output_tgt.argmax(dim=1)
        tgt_correct += (preds_tgt == tgt_lbl).sum().item()
        tgt_total += tgt_lbl.size(0)

    avg_loss = total_loss / len(source_loader)
    accuracy = correct / total
    tgt_accuracy = tgt_correct / tgt_total if tgt_total > 0 else 0.0

    print(f"Epoch {epoch:02d} | Loss: {avg_loss:.4f} | Source Acc: {accuracy:.4f} | Target Acc: {tgt_accuracy:.4f}")


In [140]:
num_epochs = 200

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
meta_dimm = isic_meta.shape[1]  # 25
model = DANN(num_classes=6, metadata_dim=meta_dimm).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

for epoch in range(num_epochs):
    p = epoch / num_epochs
    alpha = 2. / (1. + np.exp(-10 * p)) - 1

    train_dann(model, source_loader, target_loader, optimizer, epoch, alpha, device)

    # Save model checkpoint
    torch.save(model.state_dict(), f"dann_epoch_{epoch}.pth")


Loaded pretrained weights for efficientnet-b0
Epoch 00 | Loss: 0.1574 | Source Acc: 0.5000 | Target Acc: 0.0321
Epoch 01 | Loss: 0.1256 | Source Acc: 0.6839 | Target Acc: 0.0009
Epoch 02 | Loss: 0.1157 | Source Acc: 0.7134 | Target Acc: 0.0000
Epoch 03 | Loss: 0.1107 | Source Acc: 0.7277 | Target Acc: 0.0000
Epoch 04 | Loss: 0.1074 | Source Acc: 0.7723 | Target Acc: 0.0000
Epoch 05 | Loss: 0.1133 | Source Acc: 0.7527 | Target Acc: 0.0000
Epoch 06 | Loss: 0.1090 | Source Acc: 0.8000 | Target Acc: 0.0000
Epoch 07 | Loss: 0.1108 | Source Acc: 0.7580 | Target Acc: 0.0000
Epoch 08 | Loss: 0.1087 | Source Acc: 0.7696 | Target Acc: 0.0000
Epoch 09 | Loss: 0.1078 | Source Acc: 0.7821 | Target Acc: 0.0000
Epoch 10 | Loss: 0.1058 | Source Acc: 0.8045 | Target Acc: 0.0000
Epoch 11 | Loss: 0.1050 | Source Acc: 0.7911 | Target Acc: 0.0018
Epoch 12 | Loss: 0.1036 | Source Acc: 0.8205 | Target Acc: 0.0028
Epoch 13 | Loss: 0.1040 | Source Acc: 0.8125 | Target Acc: 0.0009
Epoch 14 | Loss: 0.1017 | Sour