In [1]:
% load_ext autoreload
% autoreload 2

In [2]:
import pytorch_lightning as pl
import torch
from utils.detectron import DetectronLoader, DetectronModule

from tests.detectron.detectron import infer_labels
from data import sample_data
from data.core import split_dataset
from models import pretrained

In [4]:
def load_model():
    return pretrained.resnet18_trained_on_cifar10()

In [5]:
p_train, p_val, p_test_all = sample_data.cifar10(split='all')
q_all = sample_data.cifar10_1()

test_sets = {'p': p_test_all, 'q': q_all}
base_model = load_model()

In [6]:
max_epochs_per_model = 2
ensemble_size = 2
runs = 2
gpus = [0]
batch_size = 512
num_workers = 12
N = 20

In [7]:
(pseudo_labels_train, _), (pseudo_labels_val, val_acc) = infer_labels(
    model=base_model,
    dataset=(p_train, p_val),
    gpus=gpus,
    batch_size=batch_size,
    num_workers=num_workers,
    verbose=True,
    return_accuracy=True,
)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Validation: 0it [00:00, ?it/s]

Inferred labels for 50000 samples. Accuracy: 0.991


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Validation: 0it [00:00, ?it/s]

Inferred labels for 9000 samples. Accuracy: 0.878


In [8]:
val_results = []
test_results = []

for dataset_name in ['p', 'q']:
    for seed in range(runs):
        log = {'N': N, 'seed': seed, 'dataset': dataset_name, 'ensemble_idx': 0}
        count = N
        q, _ = split_dataset(test_sets[dataset_name], N, seed)
        pseudo_labels_test = infer_labels(
            model=base_model,
            dataset=q,
            gpus=gpus,
            batch_size=N,
            num_workers=num_workers,
            verbose=True
        )

        pq_loader = DetectronLoader(p_train=p_train,
                                    p_val=p_val,
                                    q=q,
                                    p_train_pseudo_labels=pseudo_labels_train,
                                    p_val_pseudo_labels=pseudo_labels_val,
                                    q_pseudo_labels=pseudo_labels_test,
                                    batch_size=batch_size,
                                    num_workers=num_workers,
                                    )

        base = DetectronModule(base_model)
        pl.Trainer(gpus=gpus, logger=False, max_epochs=1).test(base, pq_loader.test_dataloader(), verbose=False)
        test_results.append(base.test_struct.to_dict() | {'count': count} | log)
        val_results.append({'accuracy': val_acc, 'rejection_rate': 0, 'accepted_accuracy': val_acc} | log)

        for i in range(1, ensemble_size + 1):
            log.update({'ensemble_idx': i})

            trainer = pl.Trainer(
                gpus=gpus,
                max_epochs=max_epochs_per_model,
                logger=False,
                num_sanity_val_steps=0,
                limit_val_batches=0,
                enable_model_summary=False
            )

            detector = DetectronModule(model=load_model(),
                                       alpha=(alpha := 1 / (len(pq_loader.train_dataloader()) * count + 1)))
            print(f'α = {1000 * alpha:.3f} × 10⁻³')
            trainer.fit(detector, pq_loader)
            trainer.test(detector, pq_loader.val_dataloader(), verbose=False)
            val_results.append(detector.test_struct.to_dict(minimal=True) | log)

            trainer.test(detector, pq_loader.test_dataloader(), verbose=False)
            count = pq_loader.refine(~detector.test_struct.rejection_mask, verbose=True)
            test_results.append(detector.test_struct.to_dict() | {'count': count} | log)

            if count == 0:
                print(f'Converged to rejection rate of 100% after {i} iterations')
                break

torch.save(val_results, 'val_results.pt')
torch.save(test_results, 'test_results.pt')

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Validation: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Inferred labels for 20 samples. Accuracy: 0.850


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


α = 0.495 × 10⁻³


Training: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


|Q| (20 → 18)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


α = 0.495 × 10⁻³


Training: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


|Q| (18 → 15)


Validation: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Inferred labels for 20 samples. Accuracy: 0.900


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


α = 0.495 × 10⁻³


Training: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


|Q| (20 → 19)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


α = 0.495 × 10⁻³


Training: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


|Q| (19 → 17)


Validation: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Inferred labels for 20 samples. Accuracy: 0.800


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


α = 0.495 × 10⁻³


Training: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


|Q| (20 → 17)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


α = 0.495 × 10⁻³


Training: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


|Q| (17 → 14)


Validation: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Inferred labels for 20 samples. Accuracy: 0.600


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


α = 0.495 × 10⁻³


Training: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


|Q| (20 → 11)


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


α = 0.505 × 10⁻³


Training: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3]


Testing: 0it [00:00, ?it/s]

|Q| (11 → 10)


In [10]:
import pandas as pd

In [18]:
df = pd.DataFrame(val_results)

In [19]:
df.query('dataset == "q"').groupby(['N', 'ensemble_idx']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,rejection_rate,seed
N,ensemble_idx,Unnamed: 2_level_1,Unnamed: 3_level_1
20,0,0.0,0.5
20,1,0.1125,0.5
20,2,0.113722,0.5


In [20]:
df

Unnamed: 0,accuracy,rejection_rate,accepted_accuracy,N,seed,dataset,ensemble_idx
0,tensor(0.8778),0.0,tensor(0.8778),20,0,p,0
1,0.877778,0.117111,0.923609,20,0,p,1
2,0.877778,0.109778,0.920994,20,0,p,2
3,tensor(0.8778),0.0,tensor(0.8778),20,1,p,0
4,0.877778,0.107556,0.919198,20,1,p,1
5,0.877778,0.108222,0.91839,20,1,p,2
6,tensor(0.8778),0.0,tensor(0.8778),20,0,q,0
7,0.877778,0.116,0.923077,20,0,q,1
8,0.877778,0.115,0.923415,20,0,q,2
9,tensor(0.8778),0.0,tensor(0.8778),20,1,q,0
