In [2]:
import os, math, scipy, pickle, pathlib, sys
import numpy as np
from scipy import linalg
from matplotlib.pyplot import imread
from skimage.transform import resize

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import AveragePooling2D, MaxPooling2D, UpSampling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications import inception_v3 as iv3

try:
    from tqdm import tqdm
except ImportError:
    # If not tqdm is not available, provide a mock version of it
    def tqdm(x): return x

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
##### The following two blocks of code are needed to print the names of all the layers #####
##### of the Inception Network that are needed to get the SIFID score                  #####

#inception = iv3.InceptionV3(input_shape=(299,299,3))

#i = 0
#for layer in inception.layers:
#  sp = '                                 '[len(layer.name)-9:]
#  print(i, layer.name, sp, layer.trainable)
#  i += 1

In [5]:
#for i in range(11, 17, 3):
#  layer = inception.layers[i]
#  sp = '                                 '[len(layer.name)-9:]
#  print(layer.name, sp, layer.trainable)

#print(inception.layers[10].name)

In [6]:
# Pretrained InceptionV3 network returning feature maps
class InceptionV3(keras.Model):

    # Index of default block of inception to return, corresponds to output of final average pooling
    DEFAULT_BLOCK_INDEX = 3

    # Maps feature dimensionality to their output blocks indices
    BLOCK_INDEX_BY_DIM = {
        64: 0,   # First max pooling features
        192: 1,  # Second max pooling features
        768: 2,  # Pre-aux classifier features
        2048: 3  # Final average pooling features
    }

    def __init__(self,
                 output_blocks=[DEFAULT_BLOCK_INDEX],
                 resize_input=False,
                 normalize_input=True,
                 requires_grad=False):
        """Build pretrained InceptionV3
        Parameters
        ----------
        output_blocks : list of int
            Indices of blocks to return features of. Possible values are:
                - 0: corresponds to output of first max pooling
                - 1: corresponds to output of second max pooling
                - 2: corresponds to output which is fed to aux classifier
                - 3: corresponds to output of final average pooling
        resize_input : bool
            If true, bilinearly resizes input to width and height 299 before
            feeding input to model. As the network without fully connected
            layers is fully convolutional, it should be able to handle inputs
            of arbitrary size, so resizing might not be strictly needed
        normalize_input : bool
            If true, scales the input from range (0, 1) to the range the
            pretrained Inception network expects, namely (-1, 1)
        requires_grad : bool
            If true, parameters of the model require gradient. Possibly useful
            for finetuning the network
        """
        super(InceptionV3, self).__init__()

        self.resize_input = resize_input
        self.normalize_input = normalize_input
        self.output_blocks = sorted(output_blocks)
        self.last_needed_block = max(output_blocks)

        assert self.last_needed_block <= 3, \
            'Last possible output block index is 3'

        self.blocks = []

        self.inception = iv3.InceptionV3(weights='imagenet')  # load pretrained InceptionV3 with weights trained on Imagenet

        # Block 0: input to maxpool1 (3 conv2d layers)
        block0 = []
        for i in range(1, 10, 2):
          block0.append(self.inception.layers[i])
        self.blocks.append(Sequential(block0))

        # Block 1: maxpool1 to maxpool2 (maxpool1 + 2 conv2d layers)
        if self.last_needed_block >= 1:
            block1 = [MaxPooling2D(pool_size=(3,3), strides=(2,2))]
            for i in range(11, 17, 3):
              block1.append(self.inception.layers[i])
            self.blocks.append(Sequential(block1))

        # Block 2: maxpool2 to aux classifier
        if self.last_needed_block >= 2:
            block2 = [MaxPooling2d(pool_size=(3,3), stride=2),
                self.inception.layers[63],   #inception.Mixed_5b,
                self.inception.layers[86],   #inception.Mixed_5c,
                self.inception.layers[100],  #inception.Mixed_5d,
                self.inception.layers[132],  #inception.Mixed_6a,
                self.inception.layers[164],  #inception.Mixed_6b,
                self.inception.layers[196],  #inception.Mixed_6c,
                self.inception.layers[228],  #inception.Mixed_6d,
                self.inception.layers[248],  #inception.Mixed_6e,
            ]
            self.blocks.append(Sequential(block2))

        # Block 3: aux classifier to final avgpool
        if self.last_needed_block >= 3:
            block3 = [
                self.inception.layers[276], #inception.Mixed_7a,
                self.inception.layers[279], #inception.Mixed_7b,
                self.inception.layers[307], #inception.Mixed_7c,
            ]
            self.blocks.append(Sequential(block3))

        # Adding last average pooling layer
        if self.last_needed_block >= 4:
            block4 = [ AveragePooling2D() ]
            self.blocks.append(Sequential(block4))

    def call(self, inp): # inp : torch.autograd.Variable . Input tensor of shape Bx3xHxW. Values are expected to be in range (0, 1)
        outp = []
        x = inp

        if self.resize_input:
            x = UpSampling2D(size=(299, 299), mode='bilinear')(x)

        if self.normalize_input:
            x = 2 * x - 1  # Scale from range (0, 1) to range (-1, 1)

        for idx, block in enumerate(self.blocks):
            x = block(x)
            if idx in self.output_blocks:
                outp.append(x)
            if idx == self.last_needed_block:
                break

        return outp

In [7]:
# Scale an array of images to a new size
def scale_images(images, new_shape):
    images_list = list()
    for image in images:
      # resize with nearest neighbor interpolation
      new_image = resize(image, new_shape, 0)
      # store
      images_list.append(new_image)
    return np.asarray(images_list)

In [8]:
def get_activations(files, model, batch_size=1, dims=64, verbose=False):
    if len(files) % batch_size != 0:
        print(('Warning: number of images is not a multiple of the '
               'batch size. Some samples are going to be ignored.'))
    if batch_size > len(files):
        print(('Warning: batch size is bigger than the data size. '
               'Setting batch size to data size'))
        batch_size = len(files)

    n_batches = len(files) // batch_size
    n_used_imgs = n_batches * batch_size

    pred_arr = np.empty((n_used_imgs, dims))

    for i in tqdm(range(n_batches)):
        if verbose:
            print('\rPropagating batch %d/%d' % (i + 1, n_batches), end='', flush=True)
        start = i * batch_size
        end = start + batch_size

        img = np.array([imread(str(f)).astype(np.float32) for f in files[start:end]])
        
        # Resizing the input in order to have the same dimension of the output
        img = scale_images(img, (250,166,3))

        # Reshape to (n_images, 3, height, width)
        img = img[:,:,:,0:3]
        img /= 255

        batch = tf.cast(tf.convert_to_tensor(img), dtype=tf.float32)

        pred = model.predict(batch)[0]

        pred_arr = pred.transpose(0, 2, 3, 1).reshape(batch_size*pred.shape[2]*pred.shape[3],-1)

    if verbose:
        print(' done')

    return pred_arr # numpy array of dimension (num images, dims) that contains the activations of the given tensor when feeding inception with the query tensor.

In [9]:
# Numpy implementation of the Frechet Distance.
def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
    """ The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1)
    and X_2 ~ N(mu_2, C_2) is
            d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)).
    Stable version by Dougal J. Sutherland.
    Params:
    -- mu1   : Numpy array containing the activations of a layer of the
               inception net (like returned by the function 'get_predictions')
               for generated samples.
    -- mu2   : The sample mean over activations, precalculated on an
               representative data set.
    -- sigma1: The covariance matrix over activations for generated samples.
    -- sigma2: The covariance matrix over activations, precalculated on an
               representative data set.
    Returns:
    --   : The Frechet Distance.
    """

    mu1 = np.atleast_1d(mu1)
    mu2 = np.atleast_1d(mu2)

    sigma1 = np.atleast_2d(sigma1)
    sigma2 = np.atleast_2d(sigma2)

    assert mu1.shape == mu2.shape, \
        'Training and test mean vectors have different lengths'
    assert sigma1.shape == sigma2.shape, \
        'Training and test covariances have different dimensions'

    diff = mu1 - mu2

    # Product might be almost singular
    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
    if not np.isfinite(covmean).all():
        msg = ('fid calculation produces singular product; '
               'adding %s to diagonal of cov estimates') % eps
        print(msg)
        offset = np.eye(sigma1.shape[0]) * eps
        covmean = linalg.sqrtm((sigma1 + offset).dot(sigma2 + offset))

    # Numerical error might give slight imaginary component
    if np.iscomplexobj(covmean):
        if not np.allclose(np.diagonal(covmean).imag, 0, atol=1e-3):
            m = np.max(np.abs(covmean.imag))
            raise ValueError('Imaginary component {}'.format(m))
        covmean = covmean.real

    tr_covmean = np.trace(covmean)

    return abs(diff.dot(diff) + np.trace(sigma1) +
            np.trace(sigma2) - 2 * tr_covmean)

In [10]:
# Calculation of the statistics used by the FID.
def calculate_activation_statistics(files, model, batch_size=1, dims=64, verbose=False):
    act = get_activations(files, model, batch_size, dims, verbose) # this function will give the output of Inception
    mu = np.mean(act, axis=0) # mean of the activations
    sigma = np.cov(act, rowvar=False) # covariance of the activations
    return mu, sigma


# Calculates the SIFID of two paths
def calculate_sifid_given_paths(path1, path2, batch_size, dims):
    block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[dims]

    model = InceptionV3([block_idx])

    path1 = pathlib.Path(path1)
    path2 = pathlib.Path(path2)

    fid_values = []
    Im_ind = []
    for i in range(1):
        m1, s1 = calculate_activation_statistics([path1], model, batch_size, dims)
        m2, s2 = calculate_activation_statistics([path2], model, batch_size, dims)
        fid_values.append(calculate_frechet_distance(m1, s1, m2, s2))
    return fid_values

In [20]:
# Path definition based on the student:
persona = "m"   # 'l' Luca, 'm' Michela, 's' Sofia
if persona=="l":
    orig = "/content/drive/MyDrive/Sapienza Magistrale/Corsi Attuali/Vision and Perception/Progetto V&P Condiviso/Finale/"
elif persona=="m":
    orig = "/content/drive/MyDrive/VP/Project/"
elif persona=="s":
    orig = "/content/drive/MyDrive/ColabNotebooks/VISIONS & PERSPECTIVE/Finale/"
else:
    sys.exit("Wrong User and it is impossible to define the directory. Try Again.")

path1 = orig+'Images/frattura_editing.png'
path2 = orig+'Testing/leg/editing/inject_at_7.png'

sifid_values = calculate_sifid_given_paths(path1, path2, 1, 192)

sifid_values = np.asarray(sifid_values,dtype=np.float32)
np.save('SIFID', sifid_values)
print()
print('SIFID: ', sifid_values.mean())

100%|██████████| 1/1 [00:00<00:00,  3.79it/s]
100%|██████████| 1/1 [00:00<00:00,  3.43it/s]


SIFID:  0.00028983405



