This notebook will take you through the process of generating a t-SNE of a set of images, using a feature vector for each image derived from the activations of the last fully-connected layer in a convolutional neural network (VGGNet).

To start, you need a folder of images to which you would like to apply it to. It will work with any number of images, but typically a few hundred is aimed for, or a few thousand if you are ambitious. 

The code also has a number of dependencies. You need to install:

 * [numpy]()
 * [scikit-learn]()
 * [keras]()
 * [h5py]()
 * [Pillow]()

Lastly, you should also download the weights of the VGG-16 network. You can do so by running the following command in a terminal:

    wget -i sdfjsdfjsdf
    
Run the following import commands and make sure all the libraries are correctly installed and import without errors.

In [11]:
import numpy as np
from os import listdir
from os.path import isfile, join
import h5py
from PIL import Image

from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout
from keras.layers import Convolution2D, ZeroPadding2D, MaxPooling2D
from keras.optimizers import SGD

from sklearn.manifold import TSNE


In [19]:
vgg_path = '../data/vgg16_weights.h5'
images_path = '../data/animals'
tsne_path = 'tsne_points.txt'
skip = 15 # use fewer images by skipping through directory more (1 = use all the images)

In [12]:
def get_image(path):
    img = Image.open(path)
    if img.mode != 'RGB':
        img = img.convert('RGB')
    img = img.resize((224, 224), Image.ANTIALIAS)
    im2 = np.array(img.getdata(), np.uint8)
    #im3 = np.array(img.getdata(), np.uint8)
    #im2[:,0] = im3[:,2]
    #im2[:,2] = im3[:,0]
    im2 = im2.reshape(img.size[1], img.size[0], 3).astype(np.float32)
    #im2[:,:,0] -= 103.939
    #im2[:,:,1] -= 116.779
    #im2[:,:,2] -= 123.68
    im2[:,:,0] -= 123.68
    im2[:,:,1] -= 116.779
    im2[:,:,2] -= 103.939
    im2 = im2.transpose((2,0,1))
    im2 = np.expand_dims(im2, axis=0)
    return im2


In [13]:
def VGG_16(weights_path):
    model = Sequential()
    model.add(ZeroPadding2D((1,1),input_shape=(3,224,224)))
    model.add(Convolution2D(64, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(64, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(128, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(256, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(ZeroPadding2D((1,1)))
    model.add(Convolution2D(512, 3, 3, activation='relu'))
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    f = h5py.File(weights_path)
    for k in range(f.attrs['nb_layers']):
        if k >= len(model.layers):
            break
        g = f['layer_{}'.format(k)]
        weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
        model.layers[k].set_weights(weights)
    print("finished loading VGGNet")
    return model

In [14]:


# load model
model = VGG_16(vgg_path)


finished loading VGGNet


In [15]:
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy')

# get images
images = [f for f in listdir(images_path) if isfile(join(images_path, f))]
images = [images[i] for i in range(1,len(images), skip)]
print(len(images))


134


In [16]:

# analyze images and grab activations
activations = []
for idx,image_path in enumerate(images):
    print "getting activations for %s %d/%d" % (image_path,idx,len(images))
    file_path = join(images_path,image_path)
    image = get_image(file_path);
    acts = model.predict(image)[0]
    activations.append(acts)


getting activations for bat-0001.jpg 0/134
getting activations for bat-0016.jpg 1/134
getting activations for bat-0031.jpg 2/134
getting activations for bear-0006.jpg 3/134
getting activations for bear-0021.jpg 4/134
getting activations for bear-0036.jpg 5/134
getting activations for bonsai-101-0011.jpg 6/134
getting activations for bonsai-101-0026.jpg 7/134
getting activations for butterfly-0001.jpg 8/134
getting activations for butterfly-0016.jpg 9/134
getting activations for butterfly-0031.jpg 10/134
getting activations for cactus-0006.jpg 11/134
getting activations for cactus-0021.jpg 12/134
getting activations for cactus-0036.jpg 13/134
getting activations for camel-0011.jpg 14/134
getting activations for camel-0026.jpg 15/134
getting activations for centipede-0001.jpg 16/134
getting activations for centipede-0016.jpg 17/134
getting activations for centipede-0031.jpg 18/134
getting activations for chimp-0006.jpg 19/134
getting activations for chimp-0021.jpg 20/134
getting activati

In [17]:
X = np.array(activations)
tsne = TSNE(n_components=2, perplexity=30, verbose=2).fit_transform(X)

[t-SNE] Computing pairwise distances...
[t-SNE] Computing 91 nearest neighbors...
[t-SNE] Computed conditional probabilities for sample 134 / 134
[t-SNE] Mean sigma: 24.345238
[t-SNE] Iteration 25: error = 2.1820540, gradient norm = 0.0706403
[t-SNE] Iteration 50: error = 1.9238983, gradient norm = 0.0020988
[t-SNE] Iteration 75: error = 1.7429490, gradient norm = 0.0199837
[t-SNE] Iteration 100: error = 1.7353102, gradient norm = 0.0018349
[t-SNE] Error after 100 iterations with early exaggeration: 1.735310
[t-SNE] Iteration 125: error = 1.8405241, gradient norm = 0.0018886
[t-SNE] Iteration 150: error = 1.7340525, gradient norm = 0.0012949
[t-SNE] Iteration 175: error = 1.7097749, gradient norm = 0.0011852
[t-SNE] Iteration 200: error = 1.7019470, gradient norm = 0.0012243
[t-SNE] Iteration 225: error = 1.6995424, gradient norm = 0.0012388
[t-SNE] Iteration 250: error = 1.6988744, gradient norm = 0.0012432
[t-SNE] Iteration 275: error = 1.6986834, gradient norm = 0.0012444
[t-SNE] It

In [18]:
# normalize t-sne points to {0,1}
x, y = tsne[:,0], tsne[:,1]
x = (x-np.min(x)) / (np.max(x) - np.min(x))
y = (y-np.min(y)) / (np.max(y) - np.min(y))

In [20]:
f = open(tsne_path, 'w')
for idx,image_path in enumerate(images):
    file_path = join(images_path,image_path)
    line = "%s, %f, %f" % (file_path, x[idx], y[idx])
    f.write('%s\n'%line)

f.close()
print "done! results saved to %s"%tsne_path

done! results saved to tsne_points.txt


In [None]:
main_img = Image.net(500, 500)
for idx,image_path in enumerate(images):
    file_path = join(images_path,image_path)
    x_ = w * x[idx]
    y_ = h * y[idx]
    img = Image.open(file_path)
    
    