# Calculate and Compare FID Scores of Model Output

In [None]:
import tensorflow as tf
from keras import backend as K
import pathlib
import numpy as np
import pickle

from numpy import cov
#from numpy import trace
#from numpy import iscomplexobj
from scipy.linalg import sqrtm

### Define FID function

based on https://machinelearningmastery.com/how-to-implement-the-frechet-inception-distance-fid-from-scratch/

In [None]:
# calculate frechet inception distance
def calculate_fid(model, images, reference='fid_reference_values'):    # calculate activations for images to compare to established baseline
    act = model.predict(images)
    # calculate mean and covariance statistics
    with open(reference, 'rb') as f:
        mu1 = pickle.load(f)
        sigma1 = pickle.load(f)
        mu2, sigma2 = act.mean(axis=0), np.cov(act, rowvar=False)
        # calculate sum squared difference between means
        ssdiff = np.sum((mu1 - mu2)**2.0)
        # calculate sqrt of product between cov
        covmean = sqrtm(sigma1.dot(sigma2))
        # check and correct imaginary numbers from sqrt
        if np.iscomplexobj(covmean):
            covmean = covmean.real
        # calculate score
        fid = ssdiff + np.trace(sigma1 + sigma2 - 2.0 * covmean)
    
    return fid

### Compare random generated images (DCGAN) against baseline

We load a random subset of generated images from a previous run into a tensor and run that through the calculate_fid() function

In [None]:
# load generated images
IMAGE_SIZE = (299, 299) # here we specify the expected input size of Inception V3 to let image_dataset_from_directory() automatically resize the images
BATCH_SIZE = 64

data_dir = pathlib.Path('/data/output/images/dwarfgan001')
imgs = list(data_dir.glob('*.png'))

check = tf.keras.preprocessing.image_dataset_from_directory(  data_dir,
                                                                      image_size=IMAGE_SIZE, 
                                                                      batch_size=BATCH_SIZE, 
                                                                      #labels=[0.] * len(imgs), # setting all labels to 0 (for 'fake'), not relevant here
                                                                      #label_mode=None, # yields float32 type labels
                                                                      seed=42,
                                                                      validation_split=0.99, #only 20 images available but split has to be < 1 
                                                                      subset='validation'
                                                                    )

result_dcgan = calculate_fid(model, check)
print(f'We see that the result is quite large with a FID score of: {round(result_dcgan,2)}. A perfect imitation would score a FID score close to 0.')

### Calculate FID for Real Images

In [None]:
# load real images
IMAGE_SIZE = (299, 299) # here we specify the expected input size of Inception V3 to let image_dataset_from_directory() automatically resize the images
BATCH_SIZE = 32

data_dir = pathlib.Path('/data/input/crops_small/')
imgs = list(data_dir.glob('*.png'))

check = tf.keras.preprocessing.image_dataset_from_directory(  data_dir,
                                                              image_size=IMAGE_SIZE, 
                                                              batch_size=BATCH_SIZE, 
                                                              #labels=[0.] * len(imgs), # setting all labels to 0 (for 'fake'), not relevant here
                                                              #label_mode=None, # yields float32 type labels
                                                              seed=42,
                                                              validation_split=0.025, #only 2.5% of 700'000 images as reference 
                                                              subset='validation'
                                                            )

result_real = calculate_fid(model, check)


In [None]:
print(f'Here we see a much lower FID score of: {round(result_real,2)}. Due to the variety of pictures, a score of 0 is unlikely.')

### Compare WGAN-GP RUN02 Images to Baseline

In [None]:
# load real images
IMAGE_SIZE = (299, 299) # here we specify the expected input size of Inception V3 to let image_dataset_from_directory() automatically resize the images
BATCH_SIZE = 32

data_dir = '/data/output/images/WGANGPR02FID/'
#imgs = list(data_dir.glob('*.png'))

check = tf.keras.preprocessing.image_dataset_from_directory(  data_dir,
                                                              image_size=IMAGE_SIZE, 
                                                              batch_size=BATCH_SIZE, 
                                                              #labels=[0.] * len(imgs), # setting all labels to 0 (for 'fake'), not relevant here
                                                              #label_mode=None, # yields float32 type labels
                                                              seed=42
                                                              #validation_split=0.025, #only 2.5% of 700'000 images as reference 
                                                              #subset='validation'
                                                            )

result_wgangp = calculate_fid(model, check)

In [None]:
print(f'Here we see a higher FID score of: {round(result_wgangp,2)} compared to the initial FID score of {round(result_dcgan,2)} from the DCGAN model and {round(result_real, 2)} of the real image reference score.')