In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import torch
import torchmetrics
from tqdm.auto import tqdm

sns.set()
import numpy as np
import os
import pandas as pd
import sys

sys.path.append("..")
import hydra
from src.lib.config import register_configs
from src.utils.EvalWrapper import EvalWrapper

register_configs()
try:
    hydra.initialize(config_path="../conf", job_name="plankton")
except ValueError:
    print(ValueError)

import pytorch_lightning as pl
import pickle
from eval_single_helper_funcs import instantiate_trainer, run_and_save, get_confidence_and_acc_single, get_temperature_file, get_distribution_file

The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  hydra.initialize(config_path="../conf", job_name="plankton")


In [2]:
# set global values:

experiments_singlelabel = {
    "supervised_singlelabel": "/gpfs/work/machnitz/plankton_logs/supervised/singlelabel/multirun/2022-04-19/08-14-50",
    "linear_singlelabel": "/gpfs/work/machnitz/plankton_logs/linear_eval/singlelabel/multirun/2022-04-19/19-15-20",
    "finetune_singlelabel": "/gpfs/work/machnitz/plankton_logs/finetune/singlelabel/multirun/2022-04-19/08-15-24",
    "finetune_sgd_singlelabel": "/gpfs/work/machnitz/plankton_logs/finetune_sgd/singlelabel/multirun/2022-04-19/08-15-10"
}

data_splits_per_experiment = [np.round(x, 2) for x in np.arange(0.01, 0.1, 0.01)] + [np.round(x, 2) for x in
                                                                                     np.arange(0.1, 1.1, 0.1)]
print("Data splits: ", data_splits_per_experiment)

Data splits:  [0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]


In [3]:
from matplotlib.colors import ListedColormap
import seaborn as sns

sns.set()

hereon_color_array = np.array([
    [230, 0, 70],
    [0, 145, 160],
    [0, 170, 230],
    [250, 180, 35],
    [0, 70, 125],
    [175, 25, 60],
    [170, 200, 70],
    [250, 115, 80],
    [140, 90, 180],

])
hereon_color_array = hereon_color_array / 255
hereon_cmap = ListedColormap(hereon_color_array)

sns.set_palette(hereon_color_array)

# Single Label

In [4]:
pl.seed_everything(7)
np.random.seed(7)
cfg = hydra.compose(config_name="config", overrides=["+experiment=plankton/publication/supervised_singlelabel",
                                                     "random_seed=7",
                                                     "strategy=SingleDevice",
                                                     'strategy.device="cuda:0"',
                                                     "trainer.enable_progress_bar=false",
                                                     "datamodule.batch_size=200",
                                                     "lightning_module.log_confusion_matrices=false",
                                                     "lightning_module.temperature_scale=false",
                                                     ])

Global seed set to 7


In [5]:

# train_transforms = hydra.utils.instantiate(cfg.datamodule.train_transforms)
# valid_transforms = hydra.utils.instantiate(cfg.datamodule.valid_transforms)
#
# datamodule = hydra.utils.instantiate(
#     cfg.datamodule,
#     train_transforms=train_transforms,
#     valid_transforms=valid_transforms,
#     dataset=cfg.datamodule.dataset,
#     is_ddp=False,
# )
# datamodule.setup(stage="test")

In [None]:
# trainer = instantiate_trainer(cfg)
#
# test_dataloader = datamodule.test_dataloader()
# for example_input, _ in test_dataloader:
#     break

# acc_func = torchmetrics.Accuracy(average="none", num_classes=len(datamodule.unique_labels))
best_checkpoints = []
return_metrics = dict()
for key, setup_path in experiments_singlelabel.items():
    experiment_folders = [os.path.join(setup_path, str(i)) for i in np.arange(0, 19)]
    return_metrics[key] = dict()
    # fig, axes = plt.subplots(nrows=4, ncols=5, figsize=(20, 15), sharex=True, sharey=True)
    for experiment_number, experiment_path in enumerate(tqdm(experiment_folders)):
        # print(experiment_number)
        with open(os.path.join(experiment_path, "main.log"), "r") as f:
            complete_log = f.readlines()
            found_best_checkpoint = False
            for line in complete_log:
                # print(line)
                if found_best_checkpoint:
                    best_checkpoint_part = os.path.normpath(line.strip().split("multirun/")[-1]).strip()
                    base_path = os.path.normpath(experiment_path.split("multirun")[0]).strip()
                    best_checkpoint_result = os.path.join(base_path, "multirun", best_checkpoint_part).strip()
                    # print(f"set best checkpoint to {best_checkpoint}")
                    break

                if "[main.main][INFO] - Best checkpoint path:" in line:
                    found_best_checkpoint = True
                    # print(f"found best checkpoint: {line}")
        best_checkpoints.append(best_checkpoint_result)
    #     if os.path.isfile(f"test_results/labels_{key}_{experiment_number}.pt"):
    #         print(f"loading {key}_{experiment_number} from file")
    #         logits = torch.load(f"test_results/logits_{key}_{experiment_number}.pt")
    #         labels = torch.load(f"test_results/labels_{key}_{experiment_number}.pt")
    #         with open(f"test_results/dict_{key}_{experiment_number}.pkl", 'rb') as f:
    #             return_metrics = pickle.load(f)
    #     else:
    #         logits, labels, return_metrics = run_and_save(best_checkpoint, test_dataloader, return_metrics, key,
    #                                                       experiment_number, data_splits_per_experiment, trainer,
    #                                                       datamodule, example_input)
    #
    #     accuracies, confidences, ece = get_confidence_and_acc_single(logits, labels)
    #     return_metrics[key][experiment_number]["ECE"] = ece
    #
    #     prob_scaling = EvalWrapper(temperature_file=get_temperature_file(best_checkpoint),
    #                                training_distribution_file=get_distribution_file(best_checkpoint),
    #                                device="cpu")
    #
    #     corrected_probabilities = prob_scaling(logits=logits, correct_probabilities_with_training_prior=True)
    #     accuracies_corrected, confidences_corrected, ece_corrected = get_confidence_and_acc_single(
    #         corrected_probabilities,
    #         labels,
    #         logits_are_probs=True)
    #     temp_scaled_logits = prob_scaling(logits=logits, correct_probabilities_with_temperature=True)
    #     accuracies_temp, confidences_temp, ece_temp = get_confidence_and_acc_single(temp_scaled_logits,
    #                                                                                 labels,
    #                                                                                 logits_are_probs=False)
    #     temp_and_prior_corrected_probabilities = prob_scaling(logits=temp_scaled_logits,
    #                                                           correct_probabilities_with_training_prior=True)
    #     accuracies_temp_and_corrected, confidences_temp_and_corrected, ece_temp_and_corrected = get_confidence_and_acc_single(
    #         temp_and_prior_corrected_probabilities, labels, logits_are_probs=False)
    #
    #     ax = axes.flatten()[experiment_number]
    #     sns.lineplot(x=confidences.cpu().numpy(), y=accuracies.cpu().numpy(), ax=ax, label="pure NN outputs")
    #     sns.lineplot(x=confidences_corrected.cpu().numpy(), y=accuracies_corrected.cpu().numpy(), ax=ax,
    #                  label="prior corrected outputs")
    #     sns.lineplot(x=confidences_temp.cpu().numpy(), y=accuracies_temp.cpu().numpy(), ax=ax,
    #                  label="temp scaled outputs")
    #     sns.lineplot(x=confidences_temp_and_corrected.cpu().numpy(), y=accuracies_temp_and_corrected.cpu().numpy(),
    #                  ax=ax, label="temp scaled and corrected")
    #     ax.plot(np.linspace(0, 1, 100), np.linspace(0, 1, 100), c="grey", ls="--")
    #     ax.set_xlabel("Confidence")
    #     ax.set_ylabel("Accuracy")
    #     ax.set_title(f"{key} | {data_splits_per_experiment[experiment_number]}")
    #     ax.legend(loc="best")
    # plt.savefig(f"ConfidenceAccuracy{key}.png", dpi=300)
    # plt.show()
    # plt.close("all")


In [None]:
os.path.join(base_path, best_checkpoint_part[1:])

In [36]:
best_checkpoints

['/gpfs/work/machnitz/plankton_logs/supervised/singlelabel/multirun/2022-04-19/08-14-50/0/logs/checkpoints/epoch=99.ckpt',
 '/gpfs/work/machnitz/plankton_logs/supervised/singlelabel/multirun/2022-04-19/08-14-50/1/logs/checkpoints/epoch=47.ckpt',
 '/gpfs/work/machnitz/plankton_logs/supervised/singlelabel/multirun/2022-04-19/08-14-50/2/logs/checkpoints/epoch=37.ckpt',
 '/gpfs/work/machnitz/plankton_logs/supervised/singlelabel/multirun/2022-04-19/08-14-50/3/logs/checkpoints/epoch=26.ckpt',
 '/gpfs/work/machnitz/plankton_logs/supervised/singlelabel/multirun/2022-04-19/08-14-50/4/logs/checkpoints/epoch=19.ckpt',
 '/gpfs/work/machnitz/plankton_logs/supervised/singlelabel/multirun/2022-04-19/08-14-50/5/logs/checkpoints/epoch=15.ckpt',
 '/gpfs/work/machnitz/plankton_logs/supervised/singlelabel/multirun/2022-04-19/08-14-50/6/logs/checkpoints/epoch=14.ckpt',
 '/gpfs/work/machnitz/plankton_logs/supervised/singlelabel/multirun/2022-04-19/08-14-50/7/logs/checkpoints/epoch=16.ckpt',
 '/gpfs/work/mac

In [8]:

datamodule.setup(stage="fit")
val_dataloader = datamodule.val_dataloader()

for batch in val_dataloader:
    example_input, _ = batch
    break

val_logits, val_labels, val_return_metrics = run_and_save(best_checkpoint, val_dataloader, return_metrics,
                                                          key + "valid", experiment_number)

NameError: name 'datamodule' is not defined

In [None]:

datamodule.setup(stage="test")
test_dataloader = datamodule.test_dataloader()

for batch in test_dataloader:
    example_input, _ = batch
    break

test_logits, test_labels, test_return_metrics = run_and_save(best_checkpoint, test_dataloader, return_metrics,
                                                             key + "test_new", experiment_number)

In [None]:
reform = {(outerKey, innerKey): values for outerKey, innerDict in return_metrics.items() for innerKey, values in
          innerDict.items()}
single_label_df = pd.DataFrame.from_dict(reform).T
single_label_df.index.names = ("Model", "Experiment")
single_label_df

In [None]:
fig, ax = plt.subplots(figsize=(15, 5))
sns.lineplot(x="Data Fraction", y="Accuracy/Testing", hue="Model", style="Model", data=single_label_df, ax=ax,
             palette=hereon_color_array, markers=True, markersize=10, )

# for item in data_splits_per_experiment:
#     ax.text(item,0.3,f'{item * 100:.0f}%',color="grey", horizontalalignment="center", rotation=-45)

ax.set_xscale("log")
plt.savefig("Accuracies_Testing_Singlelabel.png", dpi=300)

In [None]:
fig, ax = plt.subplots(figsize=(15, 5))
sns.lineplot(x="Data Fraction", y="ECE", hue="Model", style="Model", data=single_label_df, ax=ax,
             palette=hereon_color_array, markers=True, markersize=10, )

for item in data_splits_per_experiment:
    ax.text(item, 0.2, f'{item * 100:.0f}%', color="grey", horizontalalignment="center", rotation=-45)

ax.set_xscale("log")
ax.legend(loc="best")
plt.savefig("ECE_Testing_Singlelabel.png", dpi=300)

In [None]:
fig, ax = plt.subplots(figsize=(15, 5))
sns.lineplot(x="Data Fraction", y="loss/Testing", hue="Model", style="Model", data=single_label_df, ax=ax,
             palette=hereon_color_array, markers=True, markersize=10, )

for item in data_splits_per_experiment:
    ax.text(item, 2, f'{item * 100:.0f}%', color="grey", horizontalalignment="center", rotation=-45)

ax.set_xscale("log")
ax.legend(loc="best")
plt.savefig("NLL_Testing_Singlelabel.png", dpi=300)