In [1]:
import os
import sys
import git
import pathlib

import random

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import models, layers

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

PROJ_ROOT_PATH = pathlib.Path(git.Repo('.', search_parent_directories=True).working_tree_dir)
PROJ_ROOT =  str(PROJ_ROOT_PATH)
if PROJ_ROOT not in sys.path:
    sys.path.append(PROJ_ROOT)

import libs.model_archs
import libs.utils
from libs.constants import MODELS_FOLDER

In [2]:
# Limit GPU growth
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

In [3]:
from libs.seeds import load_model_seeds
model_seeds = load_model_seeds()
# model_seeds = [model_seeds[0]]

In [4]:
# define dataset and model architecture
# parser = argparse.ArgumentParser()
# parser.add_argument('dataset')
# parser.add_argument('model_arch')
# args= parser.parse_args()

# dataset = args.dataset
# model_arch = args.model_arch

In [5]:
# models_to_evaluate = ['mnist--fcA','mnist--lenetA','fashion--lenetA']
models_to_evaluate = ['cifar10--vgg16F']

In [6]:
result = {}
for model_type in models_to_evaluate:
    [dataset, model_arch] = model_type.split('--')
    # prepare data
    dataset_loader = getattr(libs.utils, 'prepare_'+dataset)
    (x_train, y_train), (x_test, y_test) = dataset_loader()
        
    # evaluate for all seeds    
    for seed in model_seeds:
        # load model
        model_type = dataset + "--" + model_arch
        model_instance = model_type + "-" + str(seed)
        model_filename = model_instance + ".h5"
        model_subdir = pathlib.Path(MODELS_FOLDER / model_arch)
        model_file = str(pathlib.Path(model_subdir/ model_filename))
        model = models.load_model(model_file)

        score = model.evaluate(x_test, 
                               y_test, 
                               batch_size=32,
                              verbose=False)  # returns ['loss', 'accuracy']
        result.setdefault(model_type,{}).setdefault(seed, {}).setdefault("acc", score[1])

In [7]:
# Create a dataframe from the dictionary
df = pd.DataFrame.from_dict(result)

# Use seed as column headers
df = df.transpose()

# apply the lambda function to every element of the dataframe and extract the 'acc' value
df = df.applymap(lambda x: x['acc'])

# calculate max, avg, min
df['max'] = df.max(axis=1)
df['mean'] = df.mean(axis=1)
df['min'] = df.min(axis=1)

In [8]:
df

Unnamed: 0,1437,max,mean,min
cifar10--vgg16F,0.8451,0.8451,0.8451,0.8451
