# GAN Evaluation - Inception Score (IS)

## Basic [Concept](https://machinelearningmastery.com/how-to-implement-the-inception-score-from-scratch-for-evaluating-generated-images/)

In [1]:
import numpy as np
from numpy import asarray
from numpy import expand_dims
from numpy import log
from numpy import mean
from numpy import exp

from scipy.stats import entropy

In [2]:
def calculate_inception_score(p_yx, eps=1E-16): # an epsilon used to avoid blowing up when trying to calculate the log of a zero probability
    # calculate p(y) as marginal probability
    p_y = expand_dims(p_yx.mean(axis=0), 0)
    
    # KL divergence for each image => p(y|x) * (log(p(y|x)) – log(p(y)))
    kl_d = p_yx * (log(p_yx + eps) - log(p_y + eps))
    
    # sum over classes
    sum_kl_d = kl_d.sum(axis=1)
    
    # average over images
    avg_kl_d = mean(sum_kl_d)
    
    # undo the logs
    is_score = exp(avg_kl_d)
    
    return is_score

In [3]:
# conditional probabilities for high quality images of 3 classes
p_yx = asarray([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]])
IS_score = calculate_inception_score(p_yx)
print('IS Score:',IS_score)

IS Score: 2.999999999999999


In [4]:
# conditional probabilities for low quality images
p_yx = asarray([[0.5, 0.5, 0.5], [0.5, 0.5, 0.5], [0.5, 0.5, 0.5]])
IS_score = calculate_inception_score(p_yx)
print('IS Score:',IS_score)

IS Score: 1.0


## IS in Pytorch

In [6]:
import torch
from torch import nn
from torch.autograd import Variable
from torch.nn import functional as F
import torch.utils.data

from torchvision.models.inception import inception_v3

In [10]:
def calculate_inception_score(images, n_split=1, eps=1E-16, batch_size=32, resize=False, cuda=False):
    
    N = len(images)
    
    assert batch_size > 0
    assert N > batch_size
    
    # set up dtype for inception v3 model
    if cuda:
        dtype = torch.cuda.FloatTensor
    else:
        if torch.cuda.is_available():
            print("WARNING: You have a CUDA device, so you should probably set cuda=True")
        dtype = torch.FloatTensor
        
    # set up dataloader
    data_loader = torch.utils.data.DataLoader(images, batch_size=batch_size)
    
    # load inception model
    print("Loading Inception V3 model...")
    inception_model = inception_v3(pretrained=True, transform_input=False).type(dtype)
    inception_model.eval()
    
    up = nn.Upsample(size=(299, 299), mode='bilinear', align_corners=False).type(dtype)
    def get_yhats(x):
        if resize:
            x = up(x)
        x = inception_model(x)
        return F.softmax(x, dim=1).data.cpu().numpy()

    # get predictions
    print("Predicting class probabilities for images per batch...")
    yhats = np.zeros((N, 1000))

    for i, batch in enumerate(data_loader, 0):
        batch = batch.type(dtype)
        batchv = Variable(batch)
        batch_size_i = batch.size()[0]

        yhats[i*batch_size:i*batch_size + batch_size_i] = get_yhats(batchv)

    # compute the mean kl-div
    print("Computing the mean kl-div...")
    split_scores = []

    for k in range(n_split):
        
        # retrieve p(y|x)
        yhats_ = yhats[k * (N // n_split): (k+1) * (N // n_split), :]
        
        # calculate p(y)
        p_y = np.mean(yhats_, axis=0)
        scores = []
        
        for i in range(yhats_.shape[0]):
            # conditional probability for each image (p(y|x))
            p_yx = yhats_[i, :]
            
            # KL divergence = p(y|x) * (log(p(y|x)) – log(p(y)))
            kl_d = p_yx * (log(p_yx + eps) - log(p_y + eps))
            kl_d = kl_d.sum(axis=0)
            
            # variation of calculating KL divergence from entropy of conditional & marginal probabilities
            # kl_d = entropy(p_yx, p_y) # formula: -sum(p_i * log(p_i))
            
            scores.append(kl_d)
            
        # collect INCEPTION SCOREs for each group of images
        split_scores.append(np.exp(np.mean(scores)))

    print("\nInception Score: ")
    return np.mean(split_scores), np.std(split_scores)

In [11]:
import torchvision.datasets as dset
import torchvision.transforms as transforms

cifar = dset.CIFAR10(root='data/', download=True,
                         transform=transforms.Compose([
                             transforms.Resize(32),
                             transforms.ToTensor(),
                             transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
                         ])
)

class IgnoreLabelDataset(torch.utils.data.Dataset):
    def __init__(self, orig):
        self.orig = orig

    def __getitem__(self, index):
        return self.orig[index][0]

    def __len__(self):
        return len(self.orig)

Files already downloaded and verified


In [12]:
print("Calculating Inception Score...")
mean_score, std_score = calculate_inception_score(IgnoreLabelDataset(cifar), n_split=10, batch_size=32, resize=True, cuda=True)
print("IS Mean score:",mean_score)
print("IS Std score:",std_score)

Calculating Inception Score...
Loading Inception V3 model...
Predicting class probabilities for images per batch...
Computing the mean kl-div...

Inception Score: 
IS Mean score: 9.672780714173985
IS Std score: 0.14991599423569926


---