# Visualize Image Embeddings
Now that we've collected all of the image embeddings into one place, we can visualize them.  To do this we'll use various projection algorithms from scikit learn to convert them to a 2-dimensional space we can visualize them, and then use Bokeh to make nice interactive charts.

You have to launch `python -m SimpleHTTPServer` in your image directory to get the tooltip images to work.

In [1]:
import projtools
reload(projtools)

<module 'projtools' from 'projtools.pyc'>

In [2]:
features_file = "/data/africa2017/features/features-rot256.json"
fd = projtools.FeatureDict()
fd.load_json(features_file)

In [3]:
fd.ftr_matrix

array([[ 0.53318638,  3.83277154,  0.57834119, ...,  0.35179466,
         0.8214885 ,  0.73904681],
       [ 1.64695299,  0.14653416,  0.4825823 , ...,  0.82326329,
         0.76226175,  0.44928095],
       [ 1.55584395,  0.08741904,  0.68297189, ...,  0.56759346,
         1.13845491,  0.44440094],
       ..., 
       [ 1.52112746,  1.50854087,  0.28895995, ...,  0.1872488 ,
         2.30366659,  1.73646104],
       [ 0.40040985,  1.57477045,  0.7571823 , ...,  1.38597167,
         0.85937774,  1.23852122],
       [ 1.83352375,  0.18250319,  0.        , ...,  1.52337563,
         0.28621542,  4.90630102]])

In [4]:
fd.ftr_matrix.shape

(2609, 2048)

In [5]:
import sklearn
from sklearn.decomposition import PCA

In [6]:
%%time
proj_algo = PCA(n_components=2)
pca_proj = proj_algo.fit_transform(fd.ftr_matrix)

CPU times: user 488 ms, sys: 61.4 ms, total: 549 ms
Wall time: 331 ms


In [7]:
pca_proj.shape

(2609, 2)

In [8]:
import bokeh
from bokeh.io import output_notebook, show
output_notebook()
from bokeh.plotting import figure

In [9]:
from bokeh.models import HoverTool
from bokeh.plotting import ColumnDataSource

pca_datasource = ColumnDataSource(data={
        "x": pca_proj[:,0],
        "y": pca_proj[:,1],
        "imgname": fd.names,        
    })
tooltip_html = """
        <div>  <span style="font-size:8px">@imgname</span>
            <img src="http://localhost:8000/@imgname.jpg" height=128 width=128></img>
        </div>
    """
f = figure(tools=["pan,wheel_zoom,box_zoom,reset,tap",HoverTool(tooltips = tooltip_html)],
           plot_width=800, plot_height=600, title="PCA")
f.circle('x','y', source=pca_datasource, size=15, alpha=0.3)
show(f)

In [10]:
%%time
from sklearn.manifold import TSNE, SpectralEmbedding, MDS
#proj_algo = MDS()
#proj_algo = SpectralEmbedding()
proj_algo = TSNE(perplexity=50, random_state=1234)
tsne_proj = proj_algo.fit_transform(fd.ftr_matrix)

CPU times: user 47.1 s, sys: 3.46 s, total: 50.6 s
Wall time: 51.6 s


In [11]:
tsne_datasource = ColumnDataSource(data={
        "x": tsne_proj[:,0],
        "y": tsne_proj[:,1],
        "imgname": fd.names,        
    })
f = figure(tools=["pan,wheel_zoom,box_zoom,reset,tap",HoverTool(tooltips = tooltip_html)],
           plot_width=800, plot_height=600, title="t-SNE")
f.circle('x','y', source=tsne_datasource, size=15, alpha=0.3)
show(f)