# Dimensionality reduction with T-SNE


In [None]:
import os
import sys
import numpy as np
import sklearn
import keras as K
from keras.applications.nasnet import NASNetLarge
from keras.applications.resnet50 import ResNet50
import tensorflow as tf
import kaggle
from numba import cuda
import t_sne_bhcuda.t_sne_bhcuda.bhtsne_cuda as tsne_bhcuda
from utils import (plot_tsne, get_gpu_name, get_cuda_version, get_cudnn_version,
                   find_files_with_pattern, featurize_images, clear_memory_all_gpus)

print("System version: {}".format(sys.version))
print("Sklearn version: {}".format(sklearn.__version__))
print("Numpy version: {}".format(np.__version__))
print("Kaggle version: {}".format(kaggle.KaggleApi.__version__))
print("Keras version: {}".format(K.__version__))
print("Keras backend: {}".format(K.backend.backend()))
print("Keras image data format: {}".format(K.backend.image_data_format()))
print("Tensorflow version: {}".format(tf.__version__))
print("GPU: {}".format(get_gpu_name()))
print("CUDA version: {}".format(get_cuda_version()))
print("CuDNN version: {}".format(get_cudnn_version()))


# Autoreload changes in imported files
%load_ext autoreload
%autoreload 2

# Allow multiple displays per cell
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

### Dataset

[Dogs vs Cats](https://www.kaggle.com/c/dogs-vs-cats/data) dataset, which contains 2 classes and 25000 images.

Make sure you follow the [instructions](https://github.com/Kaggle/kaggle-api#api-credentials) to get the Kaggle credentials. 


In [None]:
!/anaconda/envs/py35/bin/kaggle competitions download -c dogs-vs-cats --force

In [None]:
!unzip -q train.zip

In [None]:
files_dog = find_files_with_pattern("train", "dog*")
files_dog = files_dog[:1000]
len(files_dog)
print(files_dog[:10])

files_cat = find_files_with_pattern("train", "cat*")
files_cat = files_cat[:1000]
len(files_cat)
print(files_cat[:10])

file_names = files_dog + files_cat

In [None]:
labels_dog = [0]*len(files_dog)
labels_cat = [1]*len(files_cat)
labels = labels_dog + labels_cat
len(labels)

### Image featurization

In [None]:
#https://keras.io/applications/#resnet50
model = ResNet50(input_shape=(224, 224, 3), weights='imagenet', include_top=False, pooling='avg')

In [None]:
features = featurize_images(file_names, model)
features.shape

In [None]:
# clear gpu memory
clear_memory_all_gpus()


### Dimensionality reduction with TSNE

In [None]:
perplexity = 10.0
theta = 0.5
learning_rate = 200.0
iterations = 2000
gpu_mem = 0.8
files_dir='tsne_results'

In [None]:
%%time
t_sne_result_sklearn = tsne_bhcuda.t_sne(samples=features, use_scikit=True, files_dir=files_dir,
                        no_dims=2, perplexity=perplexity, eta=learning_rate, theta=theta,
                        iterations=iterations, gpu_mem=gpu_mem, randseed=-1, verbose=2)

In [None]:
plot_tsne(t_sne_result_sklearn, labels)

In [None]:
%%time
t_sne_result_gpu = tsne_bhcuda.t_sne(samples=features, use_scikit=False, files_dir=files_dir,
                        no_dims=2, perplexity=perplexity, eta=learning_rate, theta=theta,
                        iterations=iterations, gpu_mem=gpu_mem, randseed=-1, verbose=2)

In [None]:
plot_tsne(t_sne_result_gpu, labels)

In [None]:
!rm -rf train