An example of measuring domain similarity for transfear learning via Earth Mover's Distance (EMD). The results correspond to part of Figure 5 in the original paper.

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

%matplotlib inline
from matplotlib import pyplot as plt
import numpy as np
import os
import sys
import time
from sklearn.metrics.pairwise import euclidean_distances
import pyemd

* Feature extraction on all datasets from a ResNet-101 pre-trained on JFT.
* All features are pre-extracted in this example.
* Notice that all features are extracted in the training set of each dataset.

In [2]:
# In this example on CUB-200, we demonstrate how to calculate feature and weight for each class.
feature_dir = './feature/resnet_101_JFT_299/'
dataset = 'cub_200'

# Load extracted features on CUB-200.
feature = np.load(feature_dir + dataset + '_feature.npy')
label = np.load(feature_dir + dataset + '_label.npy')

# CUB-200 training set contains 5994 images from 200 classes, each image is 
# represented by a 2048-dimensional feature from the pre-trained ResNet-101.
print('Original feature shape: (%d, %d)' % (feature.shape[0], feature.shape[1]))
print('Number of classes: %d' % (len(np.unique(label))))

# Calculate class feature as the averaged features among all images of the class.
# Class weight is defined as the number of images of the class.
sorted_label = sorted(list(set(label)))
feature_per_class = np.zeros((len(sorted_label), 2048), dtype=np.float32)
weight = np.zeros((len(sorted_label), ), dtype=np.float32)
counter = 0
for i in sorted_label:
    idx = [(l==i) for l in label]
    feature_per_class[counter, :] = np.mean(feature[idx, :], axis=0)
    weight[counter] = np.sum(idx)
    counter += 1

print('Feature per class shape: (%d, %d)' % (feature_per_class.shape[0], 
                                             feature_per_class.shape[1]))

np.save(feature_dir + dataset + '.npy', feature_per_class)
np.save(feature_dir + dataset + '_weight.npy', weight)

Original feature shape: (5994, 2048)
Number of classes: 200
Feature per class shape: (200, 2048)


*   Calculate feature per class and weight for all datasets.

In [3]:
# Calculate domain similarity by Earth Mover's Distance (EMD).

# Set minimum number of images per class for computational efficiency.
# Classes in source domain with less than min_num_imgs images will be ignored.
min_num_imgs = 200

# Gamma for domain similarity: exp(-gamma x EMD)
gamma = 0.01

# Three source domain datasets: 
# ImageNet (ILSVRC 2012) training set,
# iNaturalist 2017 training set (original training + 90% validation), 
# ImageNet + iNaturalist training set.
source_domain = ['ImageNet_train', 'inat_train', 'ImageNet+inat']

# Seven target domain datasets (all of them are from the training set):
# CUB-200-2011 Bird, Oxford Flower 102, Stanford Car, Stanford Dog, 
# FGVC-Aircraft, NABirds, Food 101
target_domain = ['cub_200', 'flower_102', 'stanford_cars', 'stanford_dogs', 
                 'aircraft', 'nabirds', 'food_101']

# Create ImageNet + iNaturalist feature and weight by concatenation.
f_1 = np.load(feature_dir + 'ImageNet_train' + '.npy')
w_1 = np.load(feature_dir + 'ImageNet_train' + '_weight.npy')
f_2 = np.load(feature_dir + 'inat_train' + '.npy')
w_2 = np.load(feature_dir + 'inat_train' + '_weight.npy')
f = np.append(f_1, f_2, axis=0)
w = np.append(w_1, w_2, axis=0)
np.save(feature_dir + 'ImageNet+inat.npy', f)
np.save(feature_dir + 'ImageNet+inat_weight.npy', w)

tic = time.time()
for sd in source_domain:
    for td in target_domain:
        print('%s --> %s' % (sd, td))
        f_s = np.load(feature_dir + sd + '.npy')
        f_t = np.load(feature_dir + td + '.npy')
        w_s = np.load(feature_dir + sd + '_weight.npy')
        w_t = np.load(feature_dir + td + '_weight.npy')

        # Remove source domain classes with number of images < 'min_num_imgs'.
        idx = [i for i in range(len(w_s)) if w_s[i] >= min_num_imgs]
        f_s = f_s[idx, :]
        w_s = w_s[idx]

        # Make sure two histograms have the same length and distance matrix is square.
        data = np.float64(np.append(f_s, f_t, axis=0))
        w_1 = np.zeros((len(w_s) + len(w_t),), np.float64)
        w_2 = np.zeros((len(w_s) + len(w_t),), np.float64)
        w_1[:len(w_s)] = w_s / np.sum(w_s)
        w_2[len(w_s):] = w_t / np.sum(w_t)
        D = euclidean_distances(data, data)

        emd = pyemd.emd(np.float64(w_1), np.float64(w_2), np.float64(D))
        print('EMD: %.3f    Domain Similarity: %.3f\n' % (emd, np.exp(-gamma*emd)))
print('Elapsed time: %.3fs' % (time.time() - tic))

ImageNet_train --> cub_200
EMD: 57.532    Domain Similarity: 0.563

ImageNet_train --> flower_102
EMD: 64.492    Domain Similarity: 0.525

ImageNet_train --> stanford_cars
EMD: 57.993    Domain Similarity: 0.560

ImageNet_train --> stanford_dogs
EMD: 48.044    Domain Similarity: 0.619

ImageNet_train --> aircraft
EMD: 58.717    Domain Similarity: 0.556

ImageNet_train --> nabirds
EMD: 55.595    Domain Similarity: 0.574

ImageNet_train --> food_101
EMD: 57.349    Domain Similarity: 0.564

inat_train --> cub_200
EMD: 42.961    Domain Similarity: 0.651

inat_train --> flower_102
EMD: 61.340    Domain Similarity: 0.542

inat_train --> stanford_cars
EMD: 62.488    Domain Similarity: 0.535

inat_train --> stanford_dogs
EMD: 55.880    Domain Similarity: 0.572

inat_train --> aircraft
EMD: 61.148    Domain Similarity: 0.543

inat_train --> nabirds
EMD: 37.703    Domain Similarity: 0.686

inat_train --> food_101
EMD: 62.587    Domain Similarity: 0.535

ImageNet+inat --> cub_200
EMD: 53.868    D