In [1]:
import os
import sys
import git
import pathlib

import random

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import models, layers

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

PROJ_ROOT_PATH = pathlib.Path(git.Repo('.', search_parent_directories=True).working_tree_dir)
PROJ_ROOT =  str(PROJ_ROOT_PATH)
if PROJ_ROOT not in sys.path:
    sys.path.append(PROJ_ROOT)

from libs.utils import prepare_fashion, prepare_mnist
from libs.constants import MODELS_FOLDER

In [2]:
# Limit GPU growth
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

In [3]:
from libs.seeds import load_model_seeds
model_seeds = load_model_seeds()

In [4]:
# define dataset and model architecture
# parser = argparse.ArgumentParser()
# parser.add_argument('dataset')
# parser.add_argument('model_arch')
# args= parser.parse_args()

# dataset = args.dataset
# model_arch = args.model_arch

In [5]:
# define columns
columns = ['model_type'] + model_seeds
# define dataframe
data = pd.DataFrame(columns = columns)

In [24]:
models_to_evaluate = ['mnist--fcA','mnist--lenetA','fashion--lenetA']

In [68]:
result = {}
for model_type in models_to_evaluate:
    [dataset, model_arch] = model_type.split('--')
    # prepare data
    if dataset == "fashion":
        (x_train, y_train), (x_test, y_test) = prepare_fashion()
    elif dataset == "mnist":
        (x_train, y_train), (x_test, y_test) = prepare_mnist()
    else:
        print("Invalid Dataset or Dataset not found")
        
    # evaluate for all seeds    
    for seed in model_seeds:
        # load model
        model_type = dataset + "--" + model_arch
        model_instance = model_type + "-" + str(seed)
        model_filename = model_instance + ".h5"
        model_subdir = pathlib.Path(MODELS_FOLDER / model_arch)
        model_file = str(pathlib.Path(model_subdir/ model_filename))
        model = models.load_model(model_file)

        score = model.evaluate(x_test, 
                               y_test, 
                               batch_size=32,
                              verbose=False)  # returns ['loss', 'accuracy']
        result.setdefault(model_type,{}).setdefault(seed, {}).setdefault("acc", score[1])


In [73]:
# Create a dataframe from the dictionary
df = pd.DataFrame.from_dict(result)

# Use seed as column headers
df = df.transpose()

# apply the lambda function to every element of the dataframe and extract the 'acc' value
df = df.applymap(lambda x: x['acc'])

# calculate max, avg, min
df['max'] = df.max(axis=1)
df['mean'] = df.mean(axis=1)
df['min'] = df.min(axis=1)

In [74]:
df

Unnamed: 0,1437,2101,2331,4283,6199,max,mean,min
mnist--fcA,0.9612,0.965,0.9626,0.9692,0.9682,0.9692,0.9659,0.9612
mnist--lenetA,0.9902,0.9909,0.991,0.9898,0.9882,0.991,0.990183,0.9882
fashion--lenetA,0.8973,0.8925,0.9019,0.9004,0.9008,0.9019,0.899133,0.8925
