In [1]:
DEVICE: str = "cuda:1"
DATASET: str = "MouseProtein"

In [2]:
import sys
sys.path.append("../")

In [3]:
import pyhopper

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
import numpy as np
import torch
from torch import nn
import tqdm
import torch.optim
from modules import hypertab_network, contrastive_loss
from torch.utils import data
from data_loader.data_loader import load_dataset
from evaluation import evaluation

In [5]:
import pandas as pd

In [6]:
train_dataset, test_dataset = load_dataset(DATASET)

dataset = data.ConcatDataset([train_dataset, test_dataset])

try:
    class_num = len(train_dataset.tensors[1].unique())
    X_shape = train_dataset.tensors[0].shape[1]
except AttributeError:
    # MNIST
    class_num = 10
    X_shape = 784

In [7]:
class_num, X_shape

(8, 77)

## Initialize network

## Implement Barlow Twins loss

In [11]:
class BarlowTwinsLoss(nn.Module):
    def __init__(self, lbd) -> None:
        super().__init__()
        self.lbd = lbd
    
    def forward(self, z_a, z_b) -> torch.Tensor:
        z_a = nn.functional.normalize(z_a, dim=0)
        z_b = nn.functional.normalize(z_b, dim=0)
        c = torch.matmul(z_a.T, z_b) 
        invariance_loss = c - torch.eye(c.shape[0], device=c.device)
        loss = torch.sum(invariance_loss.diagonal() ** 2)

        redundancy_loss = c**2
        redundancy_loss.diagonal().fill_(0)
        loss += self.lbd * torch.sum(redundancy_loss)
        return loss

## Prepare clustering evaluation

In [16]:
def cluster(model, data_loader):
    model.eval()
    accuracies = []
    nmi_scores = []
    for step, (x, y) in enumerate(data_loader):
        x = x.to(DEVICE)
        y = y.tolist()
        
        with torch.no_grad():
            y_pred = model.forward_cluster(x).cpu().detach().tolist()
            
        try:
            nmi, ari, f, acc = evaluation.evaluate(y, y_pred, class_num)
        except IndexError:
            continue 
        accuracies.append(acc)
        nmi_scores.append(nmi)
    return np.mean(accuracies), np.mean(nmi_scores)

## Train the model

In [17]:
def run_experiment(BATCH_SIZE, MASKING_RATIO, BT_LAMBDA, PROJECTION_SIZE, EPOCHS, TEST_NODES, LEARNING_RATE, HT_LOSS_WEIGHT):
    print("Start training on device: {}".format(DEVICE))
    criterion_instance = contrastive_loss.InstanceLoss(BATCH_SIZE, 0.5, DEVICE).to(DEVICE)
    criterion_cluster = contrastive_loss.ClusterLoss(class_num, 1.0, DEVICE).to(DEVICE)

    data_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=BATCH_SIZE,
        shuffle=True,
        drop_last=True,
        num_workers=0,
    )

    params = {
        'projection_size': PROJECTION_SIZE,
        'n_layers': 3,
        '0_layer_size': 512,
        '1_layer_size': 256,
        '2_layer_size': 128,
    }

    final_accs = []
    model = hypertab_network.Network(X_shape, params, class_num,
                                    fraction=MASKING_RATIO,
                                    test_nodes=TEST_NODES).to(DEVICE)
    model = model.to(DEVICE)
    model.hypernet.to(DEVICE)

    optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-3, betas=(0.9,0.999), eps=1e-7)

    for epoch in range(EPOCHS):
        loss_epoch = 0
        loss_bt_epoch = 0
        loss_cluster_epoch = 0
        loss_hypertab_epoch = 0

        for step, (x, _) in enumerate(data_loader):
            x = x.to(DEVICE)
            optimizer.zero_grad()
            x_i, x_j = model.add_noise(x)

            z_i, z_j, c_i, c_j = model(x_i, x_j)
            
            loss_instance = criterion_instance(z_i, z_j)
            loss_cluster = criterion_cluster(c_i, c_j)
            hypertab_loss = criterion_instance(x, x_i) + criterion_instance(x, x_j)

            loss = loss_instance + loss_cluster + hypertab_loss*HT_LOSS_WEIGHT
            loss.backward()
            optimizer.step()

            loss_bt_epoch += loss_instance.item()
            loss_cluster_epoch += loss_cluster.item()
            loss_hypertab_epoch += hypertab_loss.item()
            loss_epoch += loss.item()

    acc, nmi = cluster(model, data_loader)  
    return acc, nmi

In [18]:
def pyhopper_exp(params):
    res = run_experiment(**params)[1]
    with open(f"params/{DATASET}_nmi.txt", "a") as f:
        f.write(str(params) + "\t" + str(res) + "\n")
    
    # if res is NaN, return 0
    if np.isnan(res):
        return 0
    return res

# search_grid = {
#     "fraction": pyhopper.float(0.1, 0.9, "0.1f"),
#     "lr": pyhopper.float(1e-5, 1e-3, "0.1g"),
#     "epochs": pyhopper.int(20, 150, multiple_of=20),
#     "neurons": pyhopper.int(128, 1024, multiple_of=128),
#     "out_dim": pyhopper.int(64, 512, multiple_of=64)
# }

# Search grid for: BATCH_SIZE, MASKING_RATIO, BT_LAMBDA, PROJECTION_SIZE, EPOCHS, TEST_NODES, LEARNING_RATE, HT_LOSS_WEIGHT
search_grid = {
    "BATCH_SIZE": pyhopper.choice([32, 64, 128]),
    "MASKING_RATIO": pyhopper.float(0.5, 0.9, "0.1f"),
    "BT_LAMBDA": pyhopper.float(1e-5, 1e-3, "0.1g"),
    "PROJECTION_SIZE": pyhopper.choice([64, 128, 256, 512]),
    "EPOCHS": pyhopper.choice([50, 100, 150]),
    "TEST_NODES": pyhopper.choice([2, 5, 25, 50]),
    "LEARNING_RATE": pyhopper.float(1e-4, 1e-2, "0.1g"),
    "HT_LOSS_WEIGHT": pyhopper.choice([0, 0.5, 1, 1.5]),
}

search = pyhopper.Search(search_grid)

In [22]:
best = search.run(pyhopper.wrap_n_times(pyhopper_exp, 3), "max", steps=50, pruner=pyhopper.pruners.QuantilePruner(0.8))

                           
  0%|          |  [00:52<?]

Search is scheduled for 20 steps
Start training on device: cuda:1



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

CTRL+C received. Will terminate once the currently running candidates finished



[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
100%|██████████| 140/140 [16:54<00:00,  7.25s/it]


In [None]:
accuracies = []
nmi_scores = []
for i in range(3):
    acc, nmi = run_experiment(**best)
    accuracies.append(acc)
    nmi_scores.append(nmi)
    print(acc, nmi)

# print(accuracies)
# print(np.mean(accuracies), np.std(accuracies))

# print(nmi_scores)
# print(np.mean(nmi_scores), np.std(nmi_scores))

print(f"{np.mean(accuracies)*100:.2f}~{np.std(accuracies)*100:.2f}")
print(f"{np.mean(nmi_scores)*100:.2f}~{np.std(nmi_scores)*100:.2f}")