In [16]:
import torch
import os.path as osp
import sys
from omegaconf import OmegaConf
from tqdm.notebook import tqdm
import torch
from torch.utils.data.dataloader import DataLoader
from glob import glob
import numpy as np

sys.path.append("../src")
from dataset_utils import get_datasets
from lit_utils import LitModel

torch.multiprocessing.set_sharing_strategy('file_system')
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7f016cee8370>

In [17]:
def load_cfg_file(base_dir: str):
    cfg_path = osp.join(base_dir, ".hydra", "config.yaml")
    cfg = OmegaConf.load(cfg_path)
    return cfg


def load_train_model(base_dir: str):
    model_path = glob(osp.join(base_dir, "*.ckpt"))[0]
    haparam_path = glob(osp.join(base_dir, "default", "version_0", "hparams.yaml"))[0]
    model = LitModel.load_from_checkpoint(model_path, hparams_file=haparam_path)
    model.eval()
    return model


In [18]:
dir_path = '../outputs/Beauty/results.yaml'
dir_path = '../outputs/Toys_and_Games/results.yaml'
dir_path = '../outputs/Clothing_Shoes_and_Jewelry/results.yaml'
dir_path = '../outputs/movielens/results.yaml'
dir_path = '../outputs/pinterest/results.yaml'


resource_dict =  OmegaConf.load(dir_path)
resource_dict = {key : osp.join(resource_dict['base_path'],value) for key, value in resource_dict.items() if key != 'base_path' }


In [19]:
cfg =  load_cfg_file(resource_dict['label_ratio_1.0_no_cf'])
cfg.batch_size = 1
cfg.num_workers = 0
out_dir = "."

In [20]:
# Load data
_, test_dataset, dataset_meta, _ = get_datasets(
    cfg.train_df_path,
    cfg.test_df_path,
    cfg.cf_vector_df_path,
    out_dir,
    cfg.labeled_ratio,
    cfg.is_use_bias,
    cf_based_train_loss_path=cfg.cf_based_train_loss_path,
    cf_based_test_loss_path=cfg.cf_based_test_loss_path,
    is_use_cf_embeddings=cfg.is_use_cf_embeddings,
    cf_embeddings_train_path=cfg.cf_embeddings_train_path,
    cf_embeddings_test_path=cfg.cf_embeddings_test_path,
    confidence_type=cfg.confidence_type,
    is_plot_conf_hist=False
)
print(
    "Sizes [trainset testset num_classes cf_vector_dim]=[{} {} {} {}]".format(
        dataset_meta["train_set_size"],
        dataset_meta["test_set_size"],
        dataset_meta["num_classes"],
        dataset_meta["cf_vector_dim"],
    )
)

testloader = DataLoader(
    test_dataset,
    batch_size=cfg.batch_size,
    num_workers=cfg.num_workers,
)


Sizes [trainset testset num_classes cf_vector_dim]=[18944 8119 32 65]


In [21]:
for key, base_dir in resource_dict.items():
    print(key, base_dir)
    preds_path = osp.join(base_dir, "preds.npy")
    if osp.exists(preds_path):
        continue

    model = load_train_model(base_dir)
    model = model.to("cuda")

    preds_list, labels = [], []
    for (
        imgs,
        _,
        _,
        labels_i,
        _,
        _,
    ) in tqdm(testloader):
        preds, _ = model(imgs.to("cuda"))
        preds_list.append(torch.sigmoid(preds).cpu().numpy())

        labels.append(labels_i.numpy())

    preds = np.vstack(preds_list)
    labels = np.vstack(labels)

    np.save(preds_path, preds)
    np.save(osp.join(base_dir, "labels.npy"), labels)


label_ratio_0.1_no_cf /home/ubuntu/cactus/outputs/pinterest/train_model_multirun_20220108_093359/0_cf_weight=0.0_labeled_ratio=0.1
label_ratio_0.2_no_cf /home/ubuntu/cactus/outputs/pinterest/train_model_multirun_20220108_093359/1_cf_weight=0.0_labeled_ratio=0.2
label_ratio_0.3_no_cf /home/ubuntu/cactus/outputs/pinterest/train_model_multirun_20220108_093359/2_cf_weight=0.0_labeled_ratio=0.3
label_ratio_0.4_no_cf /home/ubuntu/cactus/outputs/pinterest/train_model_multirun_20220108_093359/3_cf_weight=0.0_labeled_ratio=0.4
label_ratio_0.5_no_cf /home/ubuntu/cactus/outputs/pinterest/train_model_multirun_20220108_093359/4_cf_weight=0.0_labeled_ratio=0.5
label_ratio_0.6_no_cf /home/ubuntu/cactus/outputs/pinterest/train_model_multirun_20220108_093359/5_cf_weight=0.0_labeled_ratio=0.6
label_ratio_0.7_no_cf /home/ubuntu/cactus/outputs/pinterest/train_model_multirun_20220108_093359/6_cf_weight=0.0_labeled_ratio=0.7
label_ratio_0.8_no_cf /home/ubuntu/cactus/outputs/pinterest/train_model_multirun_20

  0%|          | 0/8119 [00:00<?, ?it/s]

label_ratio_0.2_with_cf /home/ubuntu/cactus/outputs/pinterest/train_model_multirun_20220108_090451/1_cf_weight=5.0_labeled_ratio=0.2


  0%|          | 0/8119 [00:00<?, ?it/s]

label_ratio_0.3_with_cf /home/ubuntu/cactus/outputs/pinterest/train_model_multirun_20220108_090451/2_cf_weight=5.0_labeled_ratio=0.3


  0%|          | 0/8119 [00:00<?, ?it/s]

label_ratio_0.4_with_cf /home/ubuntu/cactus/outputs/pinterest/train_model_multirun_20220108_090451/3_cf_weight=5.0_labeled_ratio=0.4


  0%|          | 0/8119 [00:00<?, ?it/s]

label_ratio_0.5_with_cf /home/ubuntu/cactus/outputs/pinterest/train_model_multirun_20220108_090451/4_cf_weight=5.0_labeled_ratio=0.5


  0%|          | 0/8119 [00:00<?, ?it/s]

KeyboardInterrupt: 