In [None]:
# See also https://i-systems.github.io/teaching/ML/iNotes/15_Autoencoder.html
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
import sklearn as skl
from sklearn import model_selection
from sklearn.neural_network import MLPRegressor

## Initialization and visual inspection
First we do some initializations and normalize the data to be in (0, 1) everywhere. Then we plot the projection of the data to a 2D coordinate plane of sigma_0 and f_Si. We notice a distinct banana shape.

In [None]:
nskip = 1000  # Use only every 1000th data point
skip = np.arange(1e6)
skip = np.delete(skip, np.arange(0, 1e6, nskip))

data = pd.read_table(
    #'/mnt/c/Users/chral/Dropbox/ipp/paper_algae/mc_out.dat', 
    '/Users/ert/Dropbox/ipp/paper_algae/mc_out.dat',
    sep='\s+', skiprows=skip)
names = {
    'mu_0': 'k_alg_growth_max',
    'f_si': 'frac_si_alg_1',
    'lambda_S': 'k_att_shade',
    'K_light': 'k_light_sm',
    'sigma_0': 'k_alg_loss',
    'a': 'coeff_d_loss_2'
}
indata = data[['k_alg_loss', 'frac_si_alg_1']].values
indata = (indata - np.min(indata,0))/np.max(indata - np.min(indata,0), 0)

px.scatter(x=indata[:,0], y=indata[:,1], labels={'x': 'sigma_0', 'y': 'f_si'})

## Autoencoder

Linear methods cannot represent such shapes, so we employ an autoencoder for nonlinear subspace identification. scikit-learn provides a simple way to implement this in a multilayer perceptron (MLP) regressor. We split the data into a training and a test set to judge the quality of the final result. Then we do the fit. Here a BFGS (quasi-Newton) optimizer is used, since the network is small enough. For larger networks one would typically use Adam or Stochastic Gradient Descent.

In [None]:
reg = MLPRegressor(hidden_layer_sizes = (8, 8, 1, 8, 8), 
                   activation = 'tanh', 
                   solver = 'lbfgs', 
                   max_iter = 512, 
                   tol = 1e-7, 
                   verbose = True)
train, test = model_selection.train_test_split(indata)
reg.fit(train, train)

## Prediction and plotting

Now we evaluate the autoencoder network for the training and the test data and plot the predictions against the original data. In this example the autoencoder performs visually well. What we see is the fit of the identity transform involving a projection on the submanifold spanned by the data.

In [None]:
output_eval_train = reg.predict(train[:,:2])
output_eval_test = reg.predict(test[:,:2])

fig = px.scatter(x=train[:,0], y=train[:,1], labels={'x': 'sigma_0', 'y': 'f_si'}, title='Training data')
fig.add_scatter(x=output_eval_train[:,0], y=output_eval_train[:,1], mode='markers', name='autoencoder')
fig.show()

fig = px.scatter(x=test[:,0], y=test[:,1], labels={'x': 'sigma_0', 'y': 'f_si'}, title='Test data')
fig.add_scatter(x=output_eval_test[:,0], y=output_eval_test[:,1], mode='markers', name='autoencoder')
fig.show()

## Projection and back-projection between reduced and full dimension

Now we plot the dependency $x_k = \xi_k(t)$ of the two parameters in full space represented via the single hidden parameter $t$. Due to limitations of scikit-learn we cut the network in half and use the first half as an inverse transform. Later this should be also done in the right half of the network. In more advanced formulations one may also leverage the symmetry to reduce the number of independent weights.

In [None]:
reg.n_layers_ = ((reg.n_layers_ - 2)+1) // 2 + 1

ae_parm = reg.predict(train)
fig = go.Figure()
fig.add_scatter(x=ae_parm, y=train[:,0], 
    mode='markers', name='sigma_0')
fig.add_scatter(x=ae_parm, y=output_eval_train[:,0], 
    mode='markers', name='sigma_0 from lift')
fig.add_scatter(x=ae_parm, y=train[:,1], 
    mode='markers', name='f_Si')
fig.add_scatter(x=ae_parm, y=output_eval_train[:,1], 
    mode='markers', name='f_Si from lift')
fig.update_layout(
    xaxis_title = 't (hidden curve parameter)',
    yaxis_title = 'sigma_0, f_Si')
fig.show()

ae_parm = reg.predict(test)
fig = go.Figure()
fig.add_scatter(x=ae_parm, y=test[:,0], 
    mode='markers', name='sigma_0')
fig.add_scatter(x=ae_parm, y=output_eval_test[:,0], 
    mode='markers', name='sigma_0 from lift')
fig.add_scatter(x=ae_parm, y=test[:,1], 
    mode='markers', name='f_Si')
fig.add_scatter(x=ae_parm, y=output_eval_test[:,1], 
    mode='markers', name='f_Si from lift')
fig.update_layout(
    xaxis_title = 't (hidden curve parameter)',
    yaxis_title = 'sigma_0, f_Si')
fig.show()

## The same with TensorFlow in higher dimension

In [None]:
import tensorflow as tf
from tensorflow import keras

Mdim = 5  # Dimension of autoencoder bottleneck

In [None]:
indata = data[names.values()].values  # all 6 parameters
indata = (indata - np.min(indata,0))/np.max(indata - np.min(indata,0),0)

In [None]:
model = keras.Sequential()
model.add(keras.layers.Dense(64, input_dim=6, activation=tf.nn.tanh))
model.add(keras.layers.Dense(64, activation=tf.nn.tanh))
model.add(keras.layers.Dense(Mdim, activation=tf.nn.tanh))
model.add(keras.layers.Dense(64, activation=tf.nn.tanh))
model.add(keras.layers.Dense(64, activation=tf.nn.tanh))
model.add(keras.layers.Dense(6, activation=None))
model.compile(optimizer=tf.optimizers.Adam(), loss='mse')

In [None]:
train, test = model_selection.train_test_split(indata)
dataset = tf.data.Dataset.from_tensor_slices((train, train))
train_dataset = dataset.shuffle(10000).batch(1)

In [None]:
history = model.fit(train_dataset, epochs = 128)

In [None]:
output_eval = model.predict(indata)
deviation = (indata-output_eval)
n = list(names.keys())
fig = go.Figure()
for k in range(len(names)):
    fig.add_scatter(y=deviation[:,k], mode='markers', name=n[k])
fig.update_yaxes(range=[-1, 1])
fig.update_traces(marker={'size': 3})
fig.update_layout(title='Deviation of data from prediction', 
    xaxis_title = 'Index',
    yaxis_title = 'Normalized deviation of autoencoder')
fig.show()