In [41]:
import numpy as np

from skimage.io import imsave
from machinedesign.autoencoder.interface import load
from machinedesign.viz import grid_of_images_default
from keras.models import Model
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.neighbors import KernelDensity
import pandas as pd
from bokeh.plotting import figure, output_file, show, save
from bokeh.models import CustomJS, ColumnDataSource, BoxSelectTool, Rect, HoverTool, BoxZoomTool, WheelZoomTool, LassoSelectTool, PanTool, Div
from bokeh.plotting import figure, output_file, show
from bokeh.layouts import column, row

In [7]:
dataset = '../../data/digits.npz'
data = np.load(dataset)
X = data['X']
y = data['y'] if 'y' in data else None
X = X.astype(np.float32)
X /= 255.
nb_classes = 10

In [107]:
clf.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 1, 28, 28)         0         
_________________________________________________________________
zero_padding2d_1 (ZeroPaddin (None, 1, 30, 30)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 64, 28, 28)        640       
_________________________________________________________________
p_re_lu_1 (PReLU)            (None, 64, 28, 28)        50176     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 64, 14, 14)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 128, 12, 12)       73856     
_________________________________________________________________
p_re_lu_2 (PReLU)            (None, 128, 12, 12)       18432     
__________

In [86]:
clf = load('../../discr/digits_and_letters')
#clf_enc = Model(inputs=clf.layers[0].input, outputs=clf.get_layer('p_re_lu_4').output)
clf_enc = Model(inputs=clf.layers[0].input, outputs=clf.layers[0].output)



In [102]:
folders = ['mnist']
nb_gen = 100000
Xfull = []
yfull = []
hfull = []
nb_gen_total = 0
for i,f in enumerate(folders):
    gen = np.load('../results/{}/gen/generated.npz'.format(f))
    Xgen = gen['generated'][0:nb_gen]
    ygen = np.array([i + nb_classes] * len(Xgen))
    hgen = clf_enc.predict(Xgen)    
    hgen = hgen.reshape((hgen.shape[0], -1))
    Xfull.append(Xgen)
    yfull.append(ygen)
    hfull.append(hgen)
    nb_gen_total += len(Xgen)

Xfull.append((X[0:nb_gen]))
yfull.append(y[0:nb_gen])
htrue = clf_enc.predict(X[0:nb_gen])
htrue = htrue.reshape((htrue.shape[0], -1))
hfull.append(htrue)

Xfull = np.concatenate(Xfull, axis=0)
yfull = np.concatenate(yfull, axis=0)
hfull = np.concatenate(hfull, axis=0)


Xgen = Xfull[0:nb_gen_total]
ygen = yfull[0:nb_gen_total]
hgen = hfull[0:nb_gen_total]

In [None]:
hfull_2d = TSNE(perplexity=40).fit_transform(hfull)

In [19]:
output_file('viz.html')

div = Div(width=100)

callback = CustomJS(args=dict(div=div), code="""
        var geometry = cb_data['geometry'];
        var width = geometry['x1'] - geometry['x0'];
        var height = geometry['y1'] - geometry['y0'];
        var x = geometry['x0'] + width/2;
        var y = geometry['y0'] + height/2;
        div.text = x.toFixed(2) + "," + (x+width).toFixed(2) + "," + y.toFixed(2) + "," + (y+height).toFixed(2);
    """)
tools = [BoxZoomTool(), WheelZoomTool(), LassoSelectTool(), PanTool(), BoxSelectTool(callback=callback)]
p = figure(plot_width=1200,
           plot_height=700,
           tools=tools,
           title="Select Below")

colors = ['orange'] * 10 + ['red', 'blue', 'green', 'lightgreen', 'lightblue', 'lightred']
hcolors = [colors[y] for y in yfull]
p.circle(
    hfull_2d[:, 0],
    hfull_2d[:, 1],
    color=hcolors,
)
layout = column(p, div)
show(layout)

08/29/2017,12:19:01 ## Session output file 'viz.html' already exists, will be overwritten.


In [34]:
x1, x2, y1, y2 = -1.18,-0.87,-6.78,3.29
mask = (hfull_2d[:, 0] >= x1) * (hfull_2d[:, 0] <= x2) * (hfull_2d[:, 1] >= y1) * (hfull_2d[:, 1] <= y2)
xmask = Xfull[mask]
im = grid_of_images_default(xmask)
imsave('clus.png', im)

  "%s to %s" % (dtypeobj_in, dtypeobj))


In [None]:
n_clusters = 500
clus = KMeans(n_clusters=n_clusters)
clus.fit(hgen)
clusters = clus.predict(hgen)

In [105]:
indices = np.arange(n_clusters)
intra_distances = []
centers = []
for cl in range(n_clusters):
    h = hgen[clusters==cl]
    center = clus.cluster_centers_[cl]
    centers.append(center)
    h = h.reshape((h.shape[0], -1))
    intra = (np.abs(h - center)).max()
    intra_distances.append(intra)
indices = np.argsort(intra_distances)
centers = np.array(centers)

In [106]:
imlist = []
for cl in indices:
    x = Xgen[clusters==cl][0:100]
    im = grid_of_images_default(x, shape=(10, 10))
    imlist.append(im)
im = np.array(imlist)
im = grid_of_images_default(im)
imsave('clus.png', im)

  "%s to %s" % (dtypeobj_in, dtypeobj))


In [82]:
imlist = []
for cl in indices:
    x = Xgen[clusters==cl]
    if len(x)<16:
        continue
    m = KernelDensity(bandwidth=0.01)
    shape = x.shape
    x = x.reshape((x.shape[0], -1))
    m.fit(x)
    yy = m.sample(100)
    yy = yy.reshape((yy.shape[0],) + shape[1:])
    im = grid_of_images_default(yy, shape=(10, 10))
    imlist.append(im)
im = np.array(imlist)
im = grid_of_images_default(im)
imsave('clus.png', im)

  "%s to %s" % (dtypeobj_in, dtypeobj))
