# t-SNE with CUDA acceleration

Run t-SNE over sets of images using the CannyLab tsne-cuda implementation: https://github.com/CannyLab/tsne-cuda

Requires that feature vectors have already been extracted from images and these pickled.

In [None]:
import os
import random
import pickle
import time
import datetime

In [None]:
from tsnecuda import TSNE

In [None]:
from PIL import Image
# from pillow import Image

In [None]:
from matplotlib.pyplot import imshow

import numpy as np
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
# from sklearn.manifold import TSNE

from matplotlib.pyplot import imshow

In [None]:
perp = 50
bPCA = True
num_iterations = 2000

In [None]:
pickle_folder = '/home/rte/re-imaging/visualisation/'

paths = []

for file in os.listdir(pickle_folder):
    if file.endswith(".pickle") and file.startswith("features"):
        paths.append(os.path.join(pickle_folder, file))
paths.sort()
# print(paths)

print("----- list of all paths: ")
for p in paths:
    print(p)
print("----------")

In [None]:
# loop here

for p in paths[:]:
    print(p)
    
    category = p.split('_')[1]
    year = p.split('_')[2]
    print("category: " + category)
    print("year: " + str(year))

    with open(p, "rb") as read_file:
        images, features = pickle.load(read_file)
        read_file.close()
    
    # check that we still have the features and list of images
    print("----- checking images and features -----")
    print("length of images: " + str(len(images)))
    print("length of features: " + str(len(features)))
    for img, f in list(zip(images, features))[0:5]:
        print("image: %s, features: %0.2f,%0.2f,%0.2f,%0.2f... "%(img, f[0], f[1], f[2], f[3]))
    
#     if len(images) >= 300:
    if True:
        features = np.array(features)
        print("----- running pca across features -----")
        print(features.shape)
        print("number of samples: ", np.size(features, 0))
        pca = PCA(n_components=min(np.size(features,0), 300))
        pca.fit(features)

        pca_features = pca.transform(features)
        
        print("----- pca done -----")
        
        print("----- running tSNE -----")

        X = np.array(pca_features)
        print(X.shape)
        tsne = TSNE(n_components=2, learning_rate=150, perplexity=perp, verbose=2, n_iter=num_iterations).fit_transform(X)
        print(tsne.shape)
        
        print("----- tSNE done -----")

        # write pickle
        print("writing tsne pickle")
        
        ts = time.time()
        st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d_%H-%M-%S')
        
        filename = "tSNE_cuda_" + category + "_" + year + "_n" + str(num_iterations) + "_p" + str(perp) + "_" + st
        print(filename + ".pickle")
        
        with open(filename + ".pickle", "wb") as write_file:
            pickle.dump([images, tsne], write_file)
            write_file.close()
            
        # normalise points
        tx, ty = tsne[:,0], tsne[:,1]
        tx = (tx-np.min(tx)) / (np.max(tx) - np.min(tx))
        ty = (ty-np.min(ty)) / (np.max(ty) - np.min(ty))

        width = 4000
        height = 3000
        max_dim = 100
        
        print("----- creating image from tiles -----")

        full_image = Image.new('RGBA', (width, height))
        for img, x, y in zip(images, tx, ty):
            tile = Image.open(img)
            tw = tile.width
            th = tile.height
#             print(img)
#             print("tile dimensions: x=" + str(tile.width) + " y=" + str(tile.height))
            if tw < 10000 and th < 10000:
                rs = max(1, tw/max_dim, th/max_dim)
                tile = tile.resize((int(tw/rs), int(th/rs)), Image.ANTIALIAS)
                full_image.paste(tile, (int((width-max_dim)*x), int((height-max_dim)*y)), mask=tile.convert('RGBA'))
#             else:
#                 print("tile width or height too big?")
                
        plt.figure(figsize = (16,12))
        imshow(full_image)


        print("saved file: " + filename + ".png")
        full_image.save(filename + ".png")
        print("----- finished! file saved -----")

    else:
        print("selected dataset has less than 300 items")
            
