This notebook allow you to play with Theano and Lasagne.

It use a pre-trained VGG16 network

In [None]:
import collections
import cPickle
import glob
import sys
sys.path.append("/home/nouiz/repos/Lasagne")

from lasagne.utils import floatX
#import matplotlib.pyplot as plt
import numpy as np
import lasagne, theano

# vgg16 include the model definition and function to read and preprocess images for VGG16
from vgg16 import build_model, prep_image

# Populating the interactive namespace from numpy and matplotlib
%pylab inline

In [None]:
def minkowski_mat(x, m, p=2.0):
    """ Compute the minkowski distance between a feature vector `x`
    and a matrix of feature vector `x`.
    """
    diff = (np.abs(x - m)**p).sum(axis=1)**(1.0/p)
    return diff

#TODO: keep?
def knn_idx(x, features, p=2):
    """Return the row index of the most similar features compared to `x`."""
    dist = minkowski_mat(x, features, p=p)
    return np.argmin(dist)

In [None]:
# vgg16.pkl contain the trained weights and the mean values needed for the preprocessing.
with open('vgg16.pkl', 'r') as f:
    d = cPickle.load(f)

MEAN_IMAGE = d['mean value']
# Get the Lasagne model
net = build_model()
# Set the pre-trained weights
lasagne.layers.set_all_param_values(net['prob'], d['param values'])

In [None]:
# Get the graph for that compute the last features layers of the model
output = lasagne.layers.get_output(net['fc8'], deterministic=True)
# Compile the Theano function to be able to execute it.
compute_last = theano.function([net['input'].input_var], output)

def compute_feats(inputs):
    """Compute the features of many images."""
    preps = []
    for inp in inputs:
        # prep_image return a 4d tensor with only 1 images
        # remove the first dimensions to batch them ourself
        preps.append(prep_image(inp, MEAN_IMAGE)[1][0])
    # batch compute the features.
    return compute_last(preps)

In [None]:
# Create a new datasets with 2 new classes
class1_dir = './dog/'
class1_name = 'chien'
class2_dir = './donut/'
class2_name = 'beigne'
test_dir = './test/'

class1_files = glob.glob(class1_dir + '*')  # list files under the "dog/" directory
class1_feats = compute_feats(class1_files)
class1_targets = [class1_name] * len(class1_files)  # build the target classes

# De me me pour la classe 2
class2_files = glob.glob(class2_dir + '*')
class2_feats = compute_feats(class2_files)
class2_targets = [class2_name] * len(class2_files)

# On cree le jeu de donnees en incorporant toutes les classes
train_files = class1_files + class2_files
train_feats = np.concatenate([class1_feats, class2_feats])
train_targets = class1_targets + class2_targets

# On calcule aussi les representations des images de test
test_files = glob.glob(test_dir + '*')
test_feats = compute_feats(test_files)

# Afficher le nom du fichier correspondant a l'exemple 0
print(test_files[0])

# Appeler knn_idx pour avoir le plus proche voisin de cet exemple
idx0 = knn_idx(test_feats[0], train_feats)

# Afficher le nom de ce fichier d'entrainement
print(train_files[idx0])

# Afficher la classe predite
print(train_targets[idx0])


In [None]:
def plot_knn(test_file, test_feat, train_files, train_feats, train_classes):
    idx = knn_idx(test_feat, train_feats)
    knn_file = train_files[idx]
    knn_class = train_classes[idx]

    figure(figsize=(12, 2))
    subplot(1, 2, 1)
    imshow(prep_image(test_file, MEAN_IMAGE)[0])

    axis('off')
    title('prediction : ' + knn_class)

    subplot(1, 2, 2)
    savefig('foo2.png', bbox_inches='tight')
    imshow(prep_image(knn_file, MEAN_IMAGE)[0])
    axis('off')
    title(knn_class)
    #savefig('foo.png', bbox_inches='tight')

plot_knn(test_files[0], test_feats[0], train_files, train_feats, train_targets)


def most_frequent(label_list):

    return collections.Counter(label_list).most_common()[0][0]

def kppv_idx(x, features, p=2, k=1):
    dist = minkowski_mat(x, features, p=p)
    return np.argsort(dist)[:k]


def plot_kppv(test_file, test_feat, train_files, train_feats, train_classes, k=1):
    kppv_i = kppv_idx(test_feat, train_feats, k=k)
    kppv_files = [train_files[i] for i in kppv_i]
    kppv_classes = [train_classes[i] for i in kppv_i]
    pred_class = most_frequent(kppv_classes)

    figure(figsize=(12, 4))
    subplot(1, k+2, 1)
    imshow(prep_image(test_file, MEAN_IMAGE)[0][:, :, :3])
    axis('off')
    title('prediction : ' + pred_class)
    for i in xrange(k):
        kppv_preproc = prep_image(kppv_files[i], MEAN_IMAGE)[0][:, :, :3]
        subplot(1, k+2, i+3)
        imshow(kppv_preproc)
        axis('off')
        title(kppv_classes[i])
    #plt.savefig(out, bbox_inches='tight', transparent=True)
    #plt.show()


In [None]:
for i in range(len(test_files)):
    plot_kppv(test_files[i], test_feats[i], train_files, train_feats, train_targets, k=7)