# AEFIT

This would be the first attempt to run the unsupervised learning VAE network to learn how to characterize a 1D profile with atted noise and missing input.


In [1]:
import numpy as np
import tensorflow as tf

# %matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.colors as colors 

import ipysh
import Hunch_utils  as Htls
import Hunch_lsplot as Hplt

%aimport Dummy_g1data
import Dummy_g1data as dummy

%aimport models.base
%aimport models.AEFIT
# ipysh.Bootstrap_support.debug()

## Data and Model
The model and data generator are set:
Dummy data generator generates from a set of 5 kind of curves with a dataset cardinality of 10K samples.

All the shapes are generated from a dictionary array that defines mean sigma and gain of sum of gaussians.
This table is printed from the variable ds.kinds

>NOTE: 
> The actual model is generating random so it is not redoing the very same samples on each epoch.
> To exactly constraint the maximum size of the dataset the buffer can be used

the model uses by default an input of 40 samples that are the (x,y) tuple values of 20 points from the generated shapes.
If the command **buffer()** is used all shaped are stored in a buffer and the generator yields always the same set of curves.


In [2]:
ds = dummy.Dummy_g1data(counts=10000).buffer()
ds.kinds

In [3]:
# convert dataset to a mirrror data-data suitable to be fed into VAE
dds = ds.ds_array.map(lambda xy,l: (xy,xy) )

In [9]:
# create a VAE model from AEFIT prototype
m = models.AEFIT.AEFIT(latent_dim=2, scale=1, beta=0., geometry=[20,20,10,10])

In [8]:
m3.beta.assign(0.0005)
fit = lambda: m.fit(dds.skip(3000).batch(100, drop_remainder=True), validation_data=dds.take(3000).batch(100), epochs=39, shuffle=False)
# If interactive notebook
models.base.fn_thread(m3, fit).control_panel()

# If standard notebook
# fit()

In [7]:
# starts a viewer of the latentspace
p = Hplt.LSPlotBokeh()
p.set_model(m)
p.set_data(ds, counts=1000)
p.plot(notebook_url='http://172.17.0.2:8888')

In [10]:
# this plot the relevance layer 
relevance = m.generative_net.layers[0]
relevance.weights

## Missing data simulation

Now we want to test the network against particular shapes within the latent main paths but with added noise and simulated missing data. The function simulate_missing_data reduce de number of available input simply duplicating the point that precedes the missing one with the same value.

A further gaussian noise has been also applied.

In [None]:

def simulate_missing_data(m, lpt=[0.5,-1.6], noise_var=0.05, arr = []):
    xy = m.decode(tf.convert_to_tensor([pt]), training=False)
    x,y = tf.split(xy[0], 2)
    x,y = (x.numpy(), y.numpy())

    fig = plt.figure('gen_missing_curve',figsize=(18, 6))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)    
    
    ax1.set_xlim(-2.,2.)
    ax1.set_ylim(-2.,2.)
    
    ax1.scatter(pt[0],pt[1],s=80)
    ax2.scatter(x,y,s=40)

    # apply noise
    x += np.random.normal(0,noise_var,len(x))
    y += np.random.normal(0,noise_var,len(y))

    # apply missing data simulation
    for i,v in enumerate(arr,0):
        x[arr[i]]=x[arr[i]+1]
        y[arr[i]]=y[arr[i]+1]
    
    ax2.scatter(x,y,s=80)

    me,va = m.encode(tf.reshape(tf.concat([x,y],0), shape=[1,-1]), training=False)
    print("Guessed Latent point = ",me.numpy())
    gpt = me[0].numpy()
    ax1.scatter(gpt[0],gpt[1])
    
    XY = m.decode(me, training=False)
    X,Y = tf.split(XY[0], 2)
    X,Y = (X.numpy(), Y.numpy())
    # plt.figure('reconstructed')
    ax2.scatter(X,Y,s=40)
    # plt.plot(X,Y)



We start by generating close to the shape {'mean': [0.5], 'sigma': [0.2], 'gain': [0.5]}

This is in the middle of the central cluster

In [None]:
# generate from point: 0.6, -0.7
pt = [-0.400,0.593]
noise_var = 0.1
arr = [3,2,1,5,8,7,6,9,12,11,14,13,18]
simulate_missing_data(m, pt,noise_var,arr)


Now we want to check if the nework can simulate a point in the middle od two clusters

In [None]:
# generate from point: 0.5, -1.6
pt = [0.666,-0.278]
noise_var = 0.05
arr = [3,2,1,5,8,7,6,9,12,11,14,13,18]
simulate_missing_data(m, pt,noise_var,arr)



In [None]:
# generate from point: 0.5, -1.6
pt = [-1.283,0.541]
noise_var = 0.0
#arr = [3,2,1,5,8,7,6,9,12,11,14,13,18]
simulate_missing_data(m, pt,noise_var)

