### Set GPU

In [1]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = "3"

## Set Dataset Name

In [2]:
# dataset_name = 'CIFAR10'
# dataset_name = 'CIFAR100'
dataset_name = 'MNIST'
# dataset_name = 'TINYIMAGENET'

### Run All Now

In [6]:
# from models.resnet_stl import resnet18
import torch
import numpy as np
from tqdm import tqdm

import sys
import os

def add_path(path):
    if path not in sys.path:
        sys.path.insert(0, path)

add_path(os.path.abspath('/home/chominhi/work/init-pools-dal-main/Unsupervised-Classification'))



from models.resnet_cifar import resnet18
from utils.memory import MemoryBank
from utils.train_utils import simclr_train
from utils.utils import fill_memory_bank
from utils.config import create_config
from utils.common_config import get_model, get_train_dataset, get_val_transformations, get_train_dataloader
from utils.evaluate_utils import hungarian_evaluate2

ImportError: attempted relative import with no known parent package

In [4]:
output_folder = '../results/'
if dataset_name == "CIFAR10":
    output_folder += 'cifar-10/'
    config_exp_path = './configs/selflabel/selflabel_cifar10.yml'
    cfg_path = 'configs/CIFAR10_RESNET18.yaml'
elif dataset_name == "CIFAR100":
    output_folder += 'cifar-20/'
    config_exp_path = './configs/selflabel/selflabel_cifar20.yml'
    cfg_path = 'configs/CIFAR100_RESNET18.yaml'
elif dataset_name == "MNIST":
    output_folder += 'mnist/'
    config_exp_path = './configs/selflabel/selflabel_mnist.yml'
    cfg_path = 'configs/MNIST_RESNET18.yaml'
elif dataset_name == "TINYIMAGENET":
    output_folder += 'tinyimagenet/'
    config_exp_path = './configs/selflabel/selflabel_tinyimagenet.yml'
    cfg_path = 'configs/TINYIMAGENET_RESNET18.yaml'
    
path_to_model = output_folder + 'selflabel/model.pth.tar'

temp = torch.load(path_to_model)

FileNotFoundError: [Errno 2] No such file or directory: '../results/mnist/selflabel/model.pth.tar'

In [None]:
import argparse

config_env_path = './configs/env.yml'
p = create_config(config_env_path, config_exp_path)

In [None]:
model = get_model(p)
model.load_state_dict(temp)
model.eval()
model.cuda();

train_data = get_train_dataset(p, get_val_transformations(p),
                                        split='train', to_augmented_dataset=False) 
train_dataloader = get_train_dataloader(p, train_data)

### Change batch size if you run into out of memory error 

In [None]:
from pycls.datasets.data import Data
from pycls.config import cfg
cfg.merge_from_file(cfg_path)
cfg.DATASET.NAME = dataset_name
data_obj = Data(cfg)

train_data, train_size = data_obj.getDataset(save_dir='../data', isTrain=True, isDownload=True)
trainSet = [i for i in range(train_size)]
trainSet = np.array(trainSet, dtype=np.ndarray)
train_dataloader = data_obj.getSequentialDataLoader(indexes=trainSet, batch_size=256, data=train_data)

test_data, test_size = data_obj.getDataset(save_dir='../data', isTrain=False, isDownload=True)
test_dataloader = data_obj.getTestLoader(data=test_data, test_batch_size=cfg.TRAIN.BATCH_SIZE, seed_id=cfg.RNG_SEED)

Preprocess Operations Selected ==>  [RandomResizedCrop(size=(32, 32), scale=(0.08, 1.0), ratio=(0.75, 1.3333), interpolation=PIL.Image.BILINEAR), ToTensor(), Normalize(mean=[0.1307], std=[0.3081])]


In [None]:
import torch.nn.functional as F

@torch.no_grad()
def get_predictions(p, dataloader, model, return_features=False):
    # Make predictions on a dataset with neighbors
    model.eval()
    predictions = [[] for _ in range(p['num_heads'])]
    probs = [[] for _ in range(p['num_heads'])]
    targets = []
    if return_features:
        ft_dim = get_feature_dimensions_backbone(p)
        features = torch.zeros((len(dataloader.sampler), ft_dim)).cuda()
    
    key_ = 'image'

    ptr = 0
    for row in tqdm(dataloader, desc="Extracting Self Label Predictions"):
#         images = row['image']
#         lbl = row['target']
        images, lbl = row
        images = images.cuda()
        output = model(images, forward_pass='default')
        for i, output_i in enumerate(output):
            predictions[i].append(torch.argmax(output_i, dim=1))
        targets.append(lbl)

    predictions = [torch.cat(pred_, dim=0) for pred_ in predictions]
    targets = torch.cat(targets, dim=0)

    out = [{'predictions': pred_, 'targets': targets} for pred_, prob_ in zip(predictions, probs)]

    if return_features:
        return out, features.cpu()
    else:
        return out

In [None]:
# from utils.evaluate_utils import get_predictions

In [None]:
predictions = get_predictions(p, train_dataloader, model)

Extracting Self Label Predictions: 100%|██████████| 235/235 [00:19<00:00, 12.31it/s]


#### Note: Stats are irrelevant for CIFAR100

In [None]:
clustering_stats = hungarian_evaluate2(0, predictions, 
                                class_names=train_data.classes,
                                compute_confusion_matrix=False,
                                confusion_matrix_file=os.path.join('confusion_matrix.png'))

In [None]:
clustering_stats

{'ACC': 0.5389666666666667,
 'ARI': 0.4456088112671098,
 'NMI': 0.5757462882079892,
 'hungarian_match': [(0, 5),
  (1, 0),
  (2, 2),
  (3, 3),
  (4, 6),
  (5, 8),
  (6, 1),
  (7, 4),
  (8, 7),
  (9, 9)]}

In [None]:
predictions[0]['predictions'].cpu()

tensor([6, 1, 7,  ..., 6, 4, 5])

In [None]:
np.save(f'{output_folder}/{dataset_name}_selflabel_cluster_ids.npy', predictions[0]['predictions'].cpu())