# Find Path Descriptors using Z space of Autoencoder

Autoencoder are good at:
- **dimensionality reduction** for data visualization
- **data denoising** for robust feature learning

Dimensions of Input space= n x 100 x 3
Dimensions of Latent space (z-space)= z

# Enhance reproducability of results

In [1]:
#%env PYTHONHASHSEED=0 
#%env CUDA_VISIBLE_DEVICES=""

In [2]:
import numpy as np
import tensorflow as tf
import random

np.random.seed(1)
random.seed(1)
tf.set_random_seed(1)

config = tf.ConfigProto(intra_op_parallelism_threads=1,inter_op_parallelism_threads=1)
from keras import backend as K
sess = tf.Session(graph=tf.get_default_graph(), config=config)
K.set_session(sess)

Using TensorFlow backend.


In [3]:
def resetRNG(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    tf.set_random_seed(seed_value)

# Import .npz Database containing 5SS mechanisms and cplr paths

In [4]:
# Import data offline
import os
current_dir=os.getcwd()
db_path=os.path.join(current_dir,'..','1. DB Preprocessing','norm_database5SS.npz')
db = np.load(db_path)

In [None]:
# Import data on Google CoLab
#from google.colab import files
#uploaded = files.upload()
#db = np.load('norm_database5SS.npz')

In [5]:
print(db.files)
cplr=db['cplrData']
mech=db['mechData']

print(cplr.shape)
print(mech.shape)

(1453, 100, 3)
(1453, 11, 3)


# Splitting the database in Train/Test dataset

In [7]:
x= cplr

# Split data into training and testing dataset
from sklearn.model_selection import train_test_split
x_train, x_test, _, _ = train_test_split(x, x, test_size=0.2, random_state=1)

# Functions to TRAIN an Autoencoder

In [8]:
from keras.layers import Input, Dense, Flatten, Reshape, SeparableConv1D, concatenate
from keras.models import Model
from keras.layers import LeakyReLU
from keras import regularizers
from keras.callbacks import ModelCheckpoint, EarlyStopping
#from keras import initializers
import keras

In [14]:
# Train same model architecture multiple times since the training process is stochastic

def AE_analysis(AE_model, iterations, x_train, x_test, filename='model'):
    val_loss_list=[]
    best_val_loss=np.inf;
    for i in range(iterations):
        print('Model no. '+str(i+1))
        AE, E = AE_model()
        val_loss, train_data=train_AE(AE, E, x_train, x_test)
        val_loss_list.append(val_loss)
        if best_val_loss>val_loss:
            best_AE=AE
            best_E=E
            best_train_data=train_data
            best_val_loss=val_loss
    
    # PRINT MEAN, VARIANCE and BEST MODEL METRICS
    print(val_loss_list)
    print('Average Validation Loss: '+str(np.mean(val_loss_list)))
    
    # SAVE BEST MODEL
    AEfilename='AE_'+filename+'_val_loss_'+str(best_val_loss)+'.h5'
    AEfilepath=os.path.join('models',AEfilename)
    best_AE.save(AEfilepath)
    
    return best_AE, best_E, best_train_data

In [10]:
def train_AE(autoencoder, encoder, x_train, x_test, epochs=500):
    # TRAINING PHASE
    autoencoder.compile(optimizer='adam', loss='mean_squared_error')
    earlyStopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min',
                                  restore_best_weights=True)
    autoencoder_train=autoencoder.fit(x_train, x_train, epochs=epochs, batch_size=128, 
                                      shuffle=False, validation_data=(x_test, x_test), 
                                      verbose=0, callbacks=[earlyStopping])
    
    # TRAINING DATA
    val_loss = autoencoder_train.history['val_loss']
    f_val_loss=np.round_(val_loss[-1],decimals=4)
    
    return f_val_loss, autoencoder_train

# Functions to VISUALIZE an Autoencoder

In [11]:
def visualize_AE(AE, train_data, x_test):
    # VISUALIZING TRAINING CURVES
    visualize_train_curves(train_data)
    
    # VISUALIZE RECONSTRUCTED CURVES
    reconst_curve = AE.predict(x_test)
    visualize_reconstructed_CplrCurves(x_test, reconst_curve, 20)

    
def visualize_train_curves(train_data):
    loss = train_data.history['loss']
    val_loss = train_data.history['val_loss']
    
    f_loss=np.round_(loss[-1],decimals=4)
    f_val_loss=np.round_(val_loss[-1],decimals=4)
    
    print('Training loss: '+str(f_loss))
    print('Validation loss: '+str(f_val_loss))
    
    epochsRange = range(len(loss))
    plt.figure()
    plt.plot(epochsRange, loss, 'r', label='Training loss')
    plt.plot(epochsRange, val_loss, 'b', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend()
    plt.show()

    
def visualize_reconstructed_CplrCurves(x_test, reconst_curve, n=10, b_spline=False):
    plt.figure(figsize=(3*n, 3))
    for i in range(n):
        ax = plt.subplot(1, n, i + 1, projection='3d')
        if b_spline:
            reconst_curve[i]=b_spline_interpolation(reconst_curve[i], 100)
        plotPath(reconst_curve[i], ax, 2,'r' )
        plotPath(x_test[i], ax, 2)
    plt.tight_layout()
    plt.show()

In [12]:
# Plotting Functions

#%matplotlib inline
%matplotlib notebook
from mpl_toolkits import mplot3d
import matplotlib.pyplot as plt
from scipy import interpolate


def plotPath(Pts, ax, limit, color = 'gray'):
    xline=Pts[:,0]
    yline=Pts[:,1]
    zline=Pts[:,2]
    ax.plot3D(xline, yline, zline, color)
    ax.auto_scale_xyz([-limit, limit], [-limit, limit], [-limit, limit])

def plotXYZ(center, RotMat, ax):
    C=np.vstack((center,center,center))
    R=RotMat
    r=(1,0,0)
    g=(0,1,0)
    b=(0,0,1)
    ax.quiver(C[:,0], C[:,1], C[:,2], R[:,0], R[:,1], R[:,2],color=(r,g,b,r,r,g,g,b,b))


def b_spline_interpolation(inp_pts, out_n):
    # Fit cubic B-spline to the points
    xp=inp_pts[i,:,0]
    yp=inp_pts[i,:,1]
    zp=inp_pts[i,:,2]
        
    # Check for duplicate points as interpolation routine errors out
    okay = np.where(np.abs(np.diff(xp)) + np.abs(np.diff(yp)) + np.abs(np.diff(zp)) > 0)
    xp = np.r_[xp[okay], xp[-1]]
    yp = np.r_[yp[okay], yp[-1]]
    zp = np.r_[zp[okay], zp[-1]]
    
    tck, u =interpolate.splprep([xp,yp,zp],s=1)
    num_pts=out_n
    u_fine = np.linspace(0,1,num_pts)
    x_f, y_f, z_f = interpolate.splev(u_fine, tck)
    Path_f = np.vstack(([x_100],[y_100],[z_100])).T
    
    return Path_f

# 3. Convolutional Autoencoders

## Convolution (Normal) AutoEncoder Z=10

## Convolution (Spatial Separable) AutoEncoder Z=10

In [None]:
from keras.layers import Lambda, Add, MaxPooling1D, UpSampling1D

def autoEncoderCov1DZ10():
    inp_curve=Input(shape=(100,3,))

    # Encoding
    x1=SeparableConv1D(4,5,padding='same')(inp_curve)
    x1=MaxPooling1D(2)(x1)
    x1=Flatten()(x1)
    x2=SeparableConv1D(4,20,padding='same')(inp_curve)
    x2=MaxPooling1D(2)(x2)
    x2=Flatten()(x2)
    x3=SeparableConv1D(4,50,padding='same')(inp_curve)
    x3=MaxPooling1D(2)(x3)
    x3=Flatten()(x3)
    x=concatenate([x1,x2,x3], axis=1)
    x=Dense(100, activation='relu')(x)
    x=Dense(30, activation='relu')(x)
    encoded=Dense(10, activation='relu')(x)

    # Decoding
    x=Dense(30, activation='relu')(encoded)
    x=Dense(100, activation='relu')(x)
    x=Dense(600, activation='relu')(x)
    x1=Lambda(lambda x: x[:,0:50*4])(x)
    x1=UpSampling1D(2)(x1)
    x1=Reshape((100, 4))(x1)
    x1=SeparableConv1D(4,5,padding='same')(x1)
    x2=Lambda(lambda x: x[:,50*4:2*50*4])(x)
    x2=UpSampling1D(2)(x2)
    x2=Reshape((100, 4))(x2)
    x2=SeparableConv1D(4,5,padding='same')(x2)
    x3=Lambda(lambda x: x[:,2*50*4:3*50*4])(x)
    x3=UpSampling1D(2)(x3)
    x3=Reshape((100, 4))(x3)
    x3=SeparableConv1D(4,5,padding='same')(x3)
    x=Add()([x1, x2, x3])
    decoded=SeparableConv1D(3,5,padding='same')(x)

    autoencoder = Model(inp_curve, decoded)
    encoder = Model(inp_curve, encoded)
    
    return autoencoder, encoder

AE,E,train_data =AE_analysis(autoEncoderCov1DZ10, 10, x_train, x_test, filename='Z10C1D')
visualize_AE(AE, train_data, x_test)

## Convolution (with FFT) AutoEncoder Z=10

## Variational Autoencoder Z=10