In [1]:
import glob
import numpy as np
import tensorflow as tf
import random

from scipy import linalg
from tensorflow.keras.models import model_from_json
from scipy.sparse import load_npz

import matplotlib.pyplot as plt

In [None]:
# The directory of the model used in load_model() - one_view model is ResNet34
model_dir = 'saved_model/one_view_model.json'

# Pretrained weights to use
weights_dir = 'saved_model/induction_0.h5'

# Data Directories 
fake_data_dir = 'fake_events_induction_0.npz.npy'
far_data_dir = '/eos/user/r/rradev/nd2fd_data/far_data/view_0/*'
near_data_dir = '/eos/user/r/rradev/nd2fd_data/larnd-sim-ztime/*'

In [2]:
# Might have to change this depending on how you load the data
def load_event(filename):
    event = load_npz(filename).todense().reshape(1, 500, 500)
    return tf.expand_dims(event, axis=0)

def load_model():
    print('Loading model from disk...')
    with open(model_dir, 'r') as json_file:
        loaded_model_json = json_file.read()
    model = model_from_json(loaded_model_json)
    model.load_weights('')
    return model

model = load_model()

def test_model(events):
    preds = []

    for idx, event in enumerate(events):
        preds.append(
            model.predict(event) 
        )
    return preds

# from https://github.com/mseitzer/pytorch-fid
def _calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
    """Numpy implementation of the Frechet Distance.
    The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
    and X_2 ~ N(mu_2, C_2) is
            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
    Stable version by Dougal J. Sutherland.
    Params:
    -- mu1   : Numpy array containing the activations of a layer of the
               inception net (like returned by the function 'get_predictions')
               for generated samples.
    -- mu2   : The sample mean over activations, precalculated on an
               representative data set.
    -- sigma1: The covariance matrix over activations for generated samples.
    -- sigma2: The covariance matrix over activations, precalculated on an
               representative data set.
    Returns:
    --   : The Frechet Distance.
    """

    mu1 = np.atleast_1d(mu1)
    mu2 = np.atleast_1d(mu2)

    sigma1 = np.atleast_2d(sigma1)
    sigma2 = np.atleast_2d(sigma2)

    assert mu1.shape == mu2.shape, "Training and test mean vectors have different lengths"
    assert sigma1.shape == sigma2.shape, "Training and test covariances have different dimensions"

    diff = mu1 - mu2

    # Product might be almost singular
    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
    if not np.isfinite(covmean).all():
        msg = (
            "fid calculation produces singular product; " "adding %s to diagonal of cov estimates"
        ) % eps
        logging.debug(msg)
        offset = np.eye(sigma1.shape[0]) * eps
        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))

    # Numerical error might give slight imaginary component
    if np.iscomplexobj(covmean):
        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
            m = np.max(np.abs(covmean.imag))
            raise ValueError("Imaginary component {}".format(m))
        covmean = covmean.real

    tr_covmean = np.trace(covmean)

    return diff.dot(diff) + np.trace(sigma1) + np.trace(sigma2) - 2 * tr_covmean

Loading model from disk...


In [3]:
model.summary()

Model: "resnext"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 500, 500, 1) 0                                            
__________________________________________________________________________________________________
conv2d (Conv2D)                 (None, 250, 250, 64) 3136        input_1[0][0]                    
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D)    (None, 125, 125, 64) 0           conv2d[0][0]                     
__________________________________________________________________________________________________
batch_normalization (BatchNorma (None, 125, 125, 64) 256         max_pooling2d[0][0]              
____________________________________________________________________________________________

In [4]:
def intermidate_layer_model(layer_name = 'global_average_pooling2d_16'):
    return tf.keras.Model(inputs=model.input, outputs=model.get_layer(layer_name).output)
    
def _cvn_activations(event, model):
    intermediate_output = model(event)
    return intermediate_output
    
def select_random_events(folder, size, replace=False, seed=1):
    seed = np.random.default_rng(seed)
    paths = glob.glob(folder)
    selected_paths = seed.choice(paths, size=size)

    return [load_event(path) for path in selected_paths]

def preprocess_fake(array):
    array = np.where(array <= 0.0, 0.0, array)
    return np.rint(array).astype('uint8')

def calculate_mu_sigma(data):
    # Load the model for the activations
    model = intermidate_layer_model()
    # Compute mean and covariance for far data
    activations_1 = []
    
    for event in data:
        activations_1.append(_cvn_activations(event, model))
    activations_1 = np.concatenate(activations_1, axis=0)
    
    mu1 = np.mean(activations_1, axis=0)
    sigma1 = np.cov(activations_1, rowvar=False)
    return mu1, sigma1

far_data = select_random_events(far_data_dir, size=1000)
near_data = select_random_events(near_data_dir, size=1000)

In [5]:
#generated far_data 
generated = preprocess_fake(np.load(fake_data_dir)[:, np.newaxis, :, :])

In [7]:
 def calculate_fnd(far_data, near_data):
    # Load the model for the activations
    model = intermidate_layer_model()
    # Compute mean and covariance for far data
    activations_1 = []
    
    for event in far_data:
        activations_1.append(_cvn_activations(event, model))
    activations_1 = np.concatenate(activations_1, axis=0)
    
    mu1 = np.mean(activations_1, axis=0)
    sigma1 = np.cov(activations_1, rowvar=False)
    
    # Compute mean and covariance for near data
    activations_2 = []
    for event in near_data:
        activations_2.append(_cvn_activations(event, model))
    activations_2 = np.concatenate(activations_2, axis=0)

    mu2 = np.mean(activations_2, axis=0)
    sigma2 = np.cov(activations_2, rowvar=False)

    fnd = _calculate_frechet_distance(mu1, sigma1, mu2, sigma2)

    return fnd

fnd_far_fake = calculate_fnd(far_data, generated)
fnd_far_near = calculate_fnd(far_data, near_data)