In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib widget

In [4]:
import sys
import os

sys.path.append('../tools')
import h5py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE

from torch.utils.data import DataLoader
from torch.utils.data import Dataset
import torch


In [5]:
data_dir = "../../../user_data/competition_data/clips"
log_folder_root = '../../../user_data/logs/'
ckpt_folder_root = '../../../user_data/checkpoints/'

targets = [
    'Dog_1',
    'Dog_2',
    'Dog_3',
    'Dog_4',
]

# targets = [
#     'Patient_1',
#     'Patient_2',
#     'Patient_3',
#     'Patient_4',
#     'Patient_5',
#     'Patient_6',
#     'Patient_7',
#     'Patient_8'
# ]

In [6]:
random_seed = 42
torch.manual_seed(random_seed)
np.random.seed(random_seed)
determine_generator = torch.Generator()
determine_generator.manual_seed(random_seed)

if torch.cuda.is_available():
    torch.cuda.manual_seed(random_seed)
    # True ensures the algorithm selected by CUFA is deterministic
    torch.backends.cudnn.deterministic = True
    # torch.set_deterministic(True)
    # False ensures CUDA select the same algorithm each time the application is run
    torch.backends.cudnn.benchmark = False

In [34]:
path = os.listdir(log_folder_root+'kaggle_dog_active')

In [35]:
path

['active_logs_BadgeSampling',
 'active_logs_BALDDropout',
 'active_logs_EntropySampling',
 'active_logs_EntropySamplingDropout',
 'active_logs_KCenterGreedy',
 'active_logs_KCenterGreedyPCA',
 'active_logs_LeastConfidence',
 'active_logs_LeastConfidenceDropout',
 'active_logs_MarginSampling',
 'active_logs_MarginSamplingDropout',
 'active_logs_RandomSampling',
 'active_logs_VarRatio']

In [32]:

def process_active_loss(path):
    dict = {'val_acc': [],
            'val_precision': [],
            'val_recall': [],
            'val_loss': [],
            }
    for i in range(0, 12):
        metrics = pd.read_csv(path + '/logger_round_' + str(i) + '/version_0/metrics.csv')
        max_ind = metrics['val_acc'].argmax()
        max_row = metrics.iloc[max_ind]
        dict['val_acc'].append(max_row['val_acc'])
        dict['val_precision'].append(max_row['val_precision'])
        dict['val_recall'].append(max_row['val_recall'])
        dict['val_loss'].append(max_row['val_loss'])
    return dict

query_obj_list = []
for p in path:
    pt = log_folder_root+'kaggle_dog_active/' + p
    query_obj_list.append(process_active_loss(pt))

In [47]:
import matplotlib
matplotlib.use('cairo')

n_samples = 264 + np.arange(0, 12) * 71

crit = 'val_acc'
plt.figure(figsize=(8, 6))
# plt.plot(n_samples, lcarr1, label='Entropy')
# plt.hlines(0.9014,xmin=0,xmax=n_samples[-1],colors='r')
plt.hlines(0.9558,xmin=n_samples[0],xmax=n_samples[-1],colors='gold' ,label='Fully Supervised Accuracy Threshold')
# plt.plot(n_samples, lcarr2, label='Coreset')
for i in np.argsort([np.mean(query_obj_list[i][crit]) for i in range(len(query_obj_list))])[::-1][:7]:
    plt.plot(n_samples, query_obj_list[i][crit], label=path[i].split('_')[-1])
plt.plot(n_samples, query_obj_list[10][crit], label=path[10].split('_')[-1])
# plt.plot(n_samples, vr[crit], label='VarRatio')
plt.xlabel('# of 1-second Samples')
plt.ylabel('Validation Accuracy')
plt.legend(loc='lower right')
plt.grid()
plt.savefig("dog_neuro_vista_example/AL_performance.svg", format="svg")
plt.show()

  plt.show()


In [37]:
query_obj_list[10]

{'val_acc': [0.8838778409090909,
  0.9410511363636364,
  0.9350142045454546,
  0.9350142045454546,
  0.9499289772727272,
  0.9485085227272728,
  0.9499289772727272,
  0.8838778409090909,
  0.9470880681818182,
  0.94921875,
  0.9509943181818182,
  0.9509943181818182],
 'val_precision': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'val_recall': [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 'val_loss': [0.2256363034248352,
  0.1454024165868759,
  0.1460864096879959,
  0.1500774025917053,
  0.1406725794076919,
  0.1396132111549377,
  0.1391232162714004,
  0.2029504626989364,
  0.1374065428972244,
  0.1374123245477676,
  0.1365651041269302,
  0.1350945234298706]}

In [17]:
np.argsort([np.mean(query_obj_list[i][crit]) for i in range(len(query_obj_list))])

array([10,  5,  4,  0,  1,  8,  2,  6, 11,  3,  7,  9], dtype=int64)

In [13]:
query_obj_list

[{'val_acc': [0.8838778409090909,
   0.9495738636363636,
   0.9488636363636364,
   0.94140625,
   0.9481534090909092,
   0.9502840909090908,
   0.9524147727272728,
   0.9517045454545454,
   0.9517045454545454,
   0.953125,
   0.9524147727272728,
   0.953125],
  'val_precision': [0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0075757575757575,
   0.0183654729109274,
   0.0,
   0.0,
   0.0525711662075298,
   0.0082644628099173,
   0.0],
  'val_recall': [0.0,
   0.0,
   0.0,
   0.0,
   0.0,
   0.0909090909090909,
   0.1818181818181818,
   0.0,
   0.0,
   0.2727272727272727,
   0.0909090909090909,
   0.0],
  'val_loss': [0.2256363034248352,
   0.1459742188453674,
   0.149370327591896,
   0.1446298658847808,
   0.1402529627084732,
   0.1390686184167862,
   0.1378500163555145,
   0.1374741345643997,
   0.1367921680212021,
   0.1369179934263229,
   0.1368170827627182,
   0.1346723884344101]},
 {'val_acc': [0.8838778409090909,
   0.9371448863636364,
   0.94921875,
   0.9499289772727272,
   0.950639

In [11]:
max_list = []
for i in range(len(query_obj_list)):
    max_list.append(max(query_obj_list[i][crit]))

In [12]:
max_list

[0.953125,
 0.9598721590909092,
 0.9630681818181818,
 0.9673295454545454,
 0.9655539772727272,
 0.9509943181818182,
 0.9655539772727272]