In [14]:
from train import run_epoch
from torch.utils.data import DataLoader
import torch
import wandb
import os
from utils import set_seed, Logger, CSVBatchLogger, log_args, get_model, hinge_loss, split_data, check_args, get_subsampled_indices
import numpy as np

# get path
p = '0.7'
dataset = 'CUB'
seed = 0
model_name = 'best_wg_acc_model'
main_dir = f"/home/thien/research/pseudogroups/{dataset}/splitpgl_sweep_logs/" \
                  f"p{p}_wd0.0001_lr0.0001"
best_model_path = f"{main_dir}/part2_oll-1rw_rgl_group_dro_p0.7_wd2e-05_lr0.0001_s0/{model_name}.pth"
data_path = f"{main_dir}/part1_s{seed}/part1and2_data_p{p}"

best_model_path1 = f"{main_dir}/part1_s{seed}/best_model.pth"
device = 'cuda:0'

# load data splits
data = torch.load(data_path)
part1_data, part2_data = data['part1'], data['part2']
batch_size = 32

part1_loader = DataLoader(part1_data, shuffle=False, batch_size=batch_size, pin_memory=True)
part2_loader = DataLoader(part2_data, shuffle=False, batch_size=batch_size, pin_memory=True)


In [19]:
# load model
model = torch.load(best_model_path)
model.to(device)
model.eval()

model1 = torch.load(best_model_path1)
model1.to(device)
model1.eval()

loader = part1_loader

is_training = False

In [20]:
# now run the model on the desired dataset
from tqdm import tqdm
with torch.set_grad_enabled(is_training):  # to make sure we don't save grad when val
    for batch_idx, batch in tqdm(enumerate(loader)):
        batch = tuple(t.to(device) for t in batch)
        x, y, g, data_idx = batch
        outputs = model(x)

        # now log the desired stats
        # Calculate stats -- get the prediction and compare with groundtruth -- save to output df
        if batch_idx == 0:
            acc_y_pred = np.argmax(outputs.detach().cpu().numpy(), axis=1)
            acc_y_true = y.detach().cpu().numpy()
            acc_g_true = g.detach().cpu().numpy()
            indices = data_idx.detach().cpu().numpy()

            probs = outputs.detach().cpu().numpy()
        else:  # concatenate
            acc_y_pred = np.concatenate([
                acc_y_pred,
                np.argmax(outputs.detach().cpu().numpy(), axis=1)
            ])
            acc_y_true = np.concatenate([acc_y_true, y.detach().cpu().numpy()])
            acc_g_true = np.concatenate([acc_g_true, g.detach().cpu().numpy()])
            indices = np.concatenate([indices, data_idx.detach().cpu().numpy()])
            probs = np.concatenate([probs, outputs.detach().cpu().numpy()], axis=0)

        assert probs.shape[0] == indices.shape[0]


105it [00:16,  6.51it/s]


In [21]:
from tqdm import tqdm

# part1
with torch.set_grad_enabled(is_training):  # to make sure we don't save grad when val
    for batch_idx, batch in tqdm(enumerate(part1_loader)):
        batch = tuple(t.to(device) for t in batch)
        x, y, g, data_idx = batch
        outputs = model1(x)

        # now log the desired stats
        # Calculate stats -- get the prediction and compare with groundtruth -- save to output df
        if batch_idx == 0:
            acc_y_pred1 = np.argmax(outputs.detach().cpu().numpy(), axis=1)
            acc_y_true1 = y.detach().cpu().numpy()
            acc_g_true1 = g.detach().cpu().numpy()
            indices1 = data_idx.detach().cpu().numpy()

            probs1 = outputs.detach().cpu().numpy()
        else:  # concatenate
            acc_y_pred1 = np.concatenate([
                acc_y_pred1,
                np.argmax(outputs.detach().cpu().numpy(), axis=1)
            ])
            acc_y_true1 = np.concatenate([acc_y_true1, y.detach().cpu().numpy()])
            acc_g_true1 = np.concatenate([acc_g_true1, g.detach().cpu().numpy()])
            indices1 = np.concatenate([indices1, data_idx.detach().cpu().numpy()])
            probs1 = np.concatenate([probs1, outputs.detach().cpu().numpy()], axis=0)

        assert probs1.shape[0] == indices1.shape[0]

105it [00:16,  6.45it/s]


In [22]:
# now calculate the final stats
pred_acc = (acc_y_pred == acc_y_true)
avg_acc = np.sum(pred_acc)/len(pred_acc)
print(f"average acc [n={len(pred_acc)}]: {avg_acc}")
for g in range(4):  # now calculate per-group acc
    g_count = np.sum(acc_g_true == g)
    group_acc = np.sum(pred_acc * (acc_g_true == g))/g_count
    print(f"Group {g} [n={g_count}]: group_acc = {group_acc}")

average acc [n=3356]: 0.9451728247914184
Group 0 [n=2430]: group_acc = 0.9259259259259259
Group 1 [n=141]: group_acc = 0.9929078014184397
Group 2 [n=38]: group_acc = 1.0
Group 3 [n=747]: group_acc = 0.9959839357429718


In [23]:
# now calculate the final stats for model1
pred_acc1 = (acc_y_pred1 == acc_y_true1)
avg_acc1 = np.sum(pred_acc1)/len(pred_acc1)
print(f"average acc [n={len(pred_acc1)}]: {avg_acc1}")
for g in range(4):  # now calculate per-group acc
    g_count1 = np.sum(acc_g_true1 == g)
    group_acc1 = np.sum(pred_acc1 * (acc_g_true1 == g))/g_count1
    print(f"Group {g} [n={g_count1}]: group_acc = {group_acc1}")

average acc [n=3356]: 1.0
Group 0 [n=2430]: group_acc = 1.0
Group 1 [n=141]: group_acc = 1.0
Group 2 [n=38]: group_acc = 1.0
Group 3 [n=747]: group_acc = 1.0
