In [None]:
import numpy as np
import scipy
import scipy.stats
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt, cm
%matplotlib inline

import sklearn
from sklearn import linear_model
from breze.learn.mlp import Mlp

import os

In [None]:
import tacman

In [None]:
sensor = 'biotac'
datasetpath = '/home/<username>/Datasets/tacman'

In [None]:
surface = tacman.datasets.curvature.raw(path=datasetpath,sensor=sensor)

In [None]:
# split by experiments, not samples
# not including experiment 0 one for iCub (extreme outlier)
if os.path.isfile(sensor + '_curvature_ind.npy'):
    ind = np.load(sensor + '_curvature_ind.npy')
else:
    ind = np.arange(len(surface))
    np.random.shuffle(ind)
    np.save(sensor + '_curvature_ind.npy', ind)
split = 0.7

In [None]:
train_surface = surface.iloc[ind[:int(split*len(ind))]]
test_surface = surface.iloc[ind[int(split*len(ind)):]]

In [None]:
X = np.array(train_surface.T['E1':'E19' if sensor == 'biotac' else 'E12'].T)
TX = np.array(test_surface.T['E1':'E19' if sensor == 'biotac' else 'E12'].T)

Y = np.array(train_surface['curvature'])
TY = np.array(test_surface['curvature'])

### Normalisation

In [None]:
mean = X.mean(0)
X -= mean
std = X.std(0)
X /= std
TX -= mean
TX /= std

### Variational Autoencoder

In [None]:
from breze.learn import sgvb
import climin
import climin.stops

import theano
import theano.tensor as T

from breze.arch.construct.layer.distributions import DiagGauss, NormalGauss
from breze.arch.construct.neural.distributions import MlpDiagGauss
from breze.arch.construct.neural import Mlp

class MlpDiagConstVarGauss(DiagGauss):
    def __init__(self, inpt, n_inpt, n_hiddens, n_output,
                 hidden_transfers, out_transfer_mean='identity',
                 declare=None, name=None, rng=None):
        self.inpt = inpt
        self.n_inpt = n_inpt
        self.n_hiddens = n_hiddens
        self.n_output = n_output
        self.hidden_transfers = hidden_transfers
        self.out_transfer_mean = out_transfer_mean
        self.mean_mlp = Mlp(
            self.inpt, self.n_inpt, self.n_hiddens, self.n_output,
            self.hidden_transfers,
            self.out_transfer_mean,
            declare=declare)
        self.std = declare((1, n_output))
        super(MlpDiagConstVarGauss, self).__init__(
            self.mean_mlp.output,
            self.std**2 + 1e-5)
            
            
class MlpGaussConstVarVisibleVAEMixin(object):
    def make_gen(self, latent_sample):
        return MlpDiagConstVarGauss(
            latent_sample, self.n_latent,
            self.n_hiddens_gen,
            self.n_inpt,
            self.gen_transfers,
            declare=self.parameters.declare)
    
class MlpGaussLatentVAEMixin(object):

    def make_prior(self, sample):
        return NormalGauss(sample.shape)

    def make_recog(self, inpt):
        return MlpDiagGauss(
            inpt, self.n_inpt,
            self.n_hiddens_recog,
            self.n_latent,
            self.recog_transfers,
            out_transfer_mean='identity',
            out_transfer_var= T.exp,
            declare=self.parameters.declare)

class MyVae(sgvb.VariationalAutoEncoder,
                    MlpGaussLatentVAEMixin,
                    MlpGaussConstVarVisibleVAEMixin):
    pass

optimizer = 'rmsprop', {'step_rate': 0.001}
batch_size = 200

n_latent = 128

m = MyVae( int(X.shape[1]),
                    [512] * 2, n_latent, [512] * 2,
                    ['sigmoid'] * 2, ['sigmoid'] * 2,
                    optimizer=optimizer, batch_size= batch_size)

In [None]:
climin.initialize.randomize_normal(m.parameters.data, 0, 0.1)

In [None]:
m.optimizer = 'adam'

In [None]:
if os.path.isfile(sensor + '_curvature_parameters.npy'):
    m.parameters.data[:] = np.load(sensor + '_curvature_parameters.npy')
else:
    max_passes = 100
    max_iter = max_passes * X.shape[0] / batch_size
    n_report = X.shape[0] / batch_size

    stop = climin.stops.AfterNIterations(max_iter)
    pause = climin.stops.ModuloNIterations(n_report)

    for i, info in enumerate(m.powerfit((X,), (X[0:1],), stop, pause)):
        print i, info['loss'], info['val_loss']

    np.save(sensor + '_curvature_parameters.npy', m.parameters.data.as_numpy_array())

In [None]:
from breze.learn.base import theanox
_f_latents = m.function(['inpt'], m.vae.recog.sample())
f_latents = lambda x: _f_latents(theanox(x)).as_numpy_array()

_f_meanvar = m.function(['inpt'], m.vae.recog.stt)
f_mean = lambda x: _f_meanvar(theanox(x)).as_numpy_array()[:, :n_latent]
f_var = lambda x: _f_meanvar(theanox(x)).as_numpy_array()[:, n_latent:]

In [None]:
L = f_latents(theanox(TX))
M = f_mean(theanox(TX))
V = f_var(theanox(TX))

### Matrix: uniform latent vs. feature

In [None]:
def plot_latent_space(L,c):
    n = L.shape[1]
    fig, axs = plt.subplots(n, n, figsize=(10, 10))
    for i in range(n):
        for j in range(n):
            if i == j:
                axs[i,j].hist2d(c, L[:,i], bins=(50,50))
            elif i > j:
                axs[i,j].scatter(L[:,i], L[:,j], c=c, marker='o')
            else:
                axs[i,j].set_axis_off()
    plt.tight_layout()

In [None]:
plot_latent_space(L[:,:5], TY)

### Classification

In [None]:
def fit_logicsticregression(x,y,tx,ty):
    from sklearn import preprocessing
    lb = preprocessing.LabelBinarizer()
    lb.fit(map(str, y) + map(str, ty))

    y = lb.transform(map(str, y))
    ty = lb.transform(map(str, ty))
    

    m = linear_model.SGDClassifier(loss='log')
    
    m = linear_model.LinearRegression()
    m.fit(x, y)
    pred = m.predict(tx)
    

    import sklearn.metrics
    error = sklearn.metrics.confusion_matrix(np.argmax(ty,1), np.argmax(pred,1))
    return error, pred, m

In [None]:
classification_algorithms = [
    {'name': 'Logistic Regression',
     'fn': fit_logicsticregression
    },
]

In [None]:
classification_targets = [
    
            {'name': 'Curvature',
             'Y':Y, 'TY':TY},
           ]

In [None]:
for alg in classification_algorithms:
    print alg['name']
    for t in classification_targets:

        print t['name'], 'raw:',
        raw_error, raw_pred, raw_m = alg['fn'](X, t['Y'], TX, t['TY'])
        plt.figure()
        plt.imshow(raw_error, interpolation="nearest", cmap=plt.cm.binary)
    
        print 'latent:',
        latent_error, latent_pred, latent_m = alg['fn'](f_mean(X), t['Y'], f_mean(TX), t['TY'])
        plt.figure()
        plt.imshow(latent_error, interpolation="nearest", cmap=plt.cm.binary)