In [43]:
%load_ext autoreload
%autoreload 2
%env CUDA_VISIBLE_DEVICES=8
import numpy as np

from sklearn.datasets import fetch_openml

import os
import sys
sys.path.append("../scripts")
sys.path.append("..")
import scripts.utils as utils
from scripts.utils import get_path, compute_knn_acc, compute_lin_acc, check_knn_acc, check_lin_acc

import pickle

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
env: CUDA_VISIBLE_DEVICES=8


In [44]:
epochs = 1000
model_name = "resnet18"
root_dir = get_path("data")


In [45]:
## def get_dataset():
cifar = fetch_openml("CIFAR_10",
                     data_home=os.path.join(root_dir, "cifar10")
                     )
# Separate the color channels and move them to the back.
data = np.moveaxis(cifar.data.reshape(60000, 3, 32, 32), 1, -1)
labels = np.vectorize(np.int8)(cifar.target)
mean = (0.4914, 0.4822, 0.4465)
std = (0.2023, 0.1994, 0.2010)
dim = (32, 32, 3)


# inspect single run

In [47]:
run = 0
negative_samples = 2

In [48]:
file_name = os.path.join(root_dir, "cifar10", "results",
                         f"{model_name}_epochs_{epochs}_m_{negative_samples}_run_{run}.pkl")
with open(file_name, "rb") as file:
    embedder = pickle.load(file)


In [49]:
check_knn_acc(data,
              labels,
              embedder,
              file_name=file_name,
              run=0,
              test_size=10000,
              batch_size=1024,
              num_workers=8,
              device="cuda:0")

0.8715

In [50]:
check_lin_acc(data,
              labels,
              embedder,
              file_name=file_name,
              run=0,
              test_size=10000,
              batch_size=1024,
              num_workers=8,
              device="cuda:0")

0.9081

# inspect metrics for all models

In [58]:
runs = [0, 1, 2]
nbs_neg_samples = [2, 16, 128, 512, 2048]

In [59]:
embedders = []
knn_accs = []
lin_accs = []
times = []
for n_neg_samples in nbs_neg_samples:
    embedders_by_runs = []
    for run in runs:
        file_name = os.path.join(root_dir, "cifar10", "results",
                         f"{model_name}_epochs_{epochs}_m_{n_neg_samples}_run_{run}.pkl")

        with open(file_name, "rb") as file:
            embedder = pickle.load(file)
        check_knn_acc(data, labels, embedder, file_name, run)
        check_lin_acc(data, labels, embedder, file_name, run)

        embedders_by_runs.append(embedder)
        print(f"Done with n_neg_samples {n_neg_samples} and run {run}." )
    embedders.append(embedders_by_runs)

Done with n_neg_samples 2 and run 0.
Done with n_neg_samples 2 and run 1.
Done with n_neg_samples 2 and run 2.
Done with n_neg_samples 16 and run 0.
Done with n_neg_samples 16 and run 1.
Done with n_neg_samples 16 and run 2.
Done with n_neg_samples 128 and run 0.
Done with n_neg_samples 128 and run 1.
Done with n_neg_samples 128 and run 2.
Done with n_neg_samples 512 and run 0.
Done with n_neg_samples 512 and run 1.
Done with n_neg_samples 512 and run 2.
Done with n_neg_samples 2048 and run 0.
Done with n_neg_samples 2048 and run 1.
Done with n_neg_samples 2048 and run 2.


In [60]:
knn_accs = np.array([[embedder.knn_acc for embedder in embedders_by_runs] for embedders_by_runs in embedders])
lin_accs = np.array([[embedder.lin_acc for embedder in embedders_by_runs] for embedders_by_runs in embedders])

In [61]:
for i, n_neg_samples in enumerate(nbs_neg_samples):
    print(n_neg_samples)
    print(f"knn acc: {np.round(knn_accs[i].mean(), 3)} +/- {np.round(knn_accs[i].std(), 3)}")
    print(f"lin acc: {np.round(lin_accs[i].mean(), 3)} +/- {np.round(lin_accs[i].std(), 3)}")
    print("\n")

2
knn acc: 0.869 +/- 0.002
lin acc: 0.907 +/- 0.002


16
knn acc: 0.917 +/- 0.001
lin acc: 0.931 +/- 0.002


128
knn acc: 0.92 +/- 0.003
lin acc: 0.933 +/- 0.003


512
knn acc: 0.922 +/- 0.002
lin acc: 0.932 +/- 0.003


2048
knn acc: 0.919 +/- 0.001
lin acc: 0.933 +/- 0.001




In [62]:
times = np.array([embedder.time for embedder in embedders_by_runs for embedders_by_runs in embedders]) / 60  # in minutes

In [63]:
time_mean = times.mean()
time_std = times.std()

print(f"Time per run [min]: {time_mean} +/- {time_std}")

Time per run [min]: 721.1296853370137 +/- 16.707088954114635
