# Recurrent AutoEncoder with Sequence-aware encoding

## About

This source code was written for research purpose (https://arxiv.org/abs/2009.07349) and has a minimal error checking. The code may be not very readable and comments may not be adequate. There is no warranty, your use of this code is at your own risk.

Cite (paper preprint):

```
@misc{susik2020recurrent,
    title={Recurrent autoencoder with sequence-aware encoding},
    author={Robert Susik},
    year={2020},
    eprint={2009.07349},
    archivePrefix={arXiv},
    primaryClass={cs.LG}
}
```

## Requirements

- Python 3
- Tensorflow==2.3.0
- Matplotlib
- Numpy

See more details in `requirements.txt` file.

_NOTE: The code was tested on Fedora 28 64-bit, and never tested on other OS._


## Load all results

In [None]:
import pickle
import numpy as np
import pandas as pd

import main
from importlib import reload
reload(main)
from main import *
from IPython import display
import glob

tmp = []

for filename in glob.glob('results_*.pickle'):
    with open(filename, 'rb') as f:
        tmp.append([x['training'] for x in np.array(pickle.load(f))])

results = np.concatenate(tmp)
results_df = pd.DataFrame(results) # load results also to dataframe


## Plot figures

### Figure 2

In [None]:
plot_results(
[x for x in np.array(results) if 
    x[TRAINING_DETAILS.FEATURES] == 1 and
    x[TRAINING_DETAILS.HIDDEN_DIM] == 50
]
    , metric='loss'
    , filename='fig2a.pgf'
)
display.Image(filename=f'./fig2a.pgf.png')

In [None]:
plot_results(
[x for x in np.array(results) if 
    x[TRAINING_DETAILS.FEATURES] == 1 and
    x[TRAINING_DETAILS.HIDDEN_DIM] == 200
]
    , metric='loss'
    , filename='fig2b.pgf'
)

display.Image(filename=f'./fig2b.pgf.png')

### Figure 3

In [None]:
plot_results(
[x for x in np.array(results) if 
    x[TRAINING_DETAILS.FEATURES] == 2 and
    x[TRAINING_DETAILS.HIDDEN_DIM] == 100
]
    , metric='loss'
    , filename='fig3a.pgf'
)

display.Image(filename=f'./fig3a.pgf.png')

In [None]:
plot_results(
[x for x in np.array(results) if 
    x[TRAINING_DETAILS.FEATURES] == 2 and
    x[TRAINING_DETAILS.HIDDEN_DIM] == 400
]
    , metric='loss'
    , filename='fig3b.pgf'
)

display.Image(filename=f'./fig3b.pgf.png')

### Figure 4

In [None]:
plot_results(
[x for x in np.array(results) if 
    x[TRAINING_DETAILS.FEATURES] == 4 and
    x[TRAINING_DETAILS.HIDDEN_DIM] == 200
]
    , metric='loss'
    , filename='fig4a.pgf'
)

display.Image(filename=f'./fig4a.pgf.png')

In [None]:
plot_results(
[x for x in np.array(results) if 
    x[TRAINING_DETAILS.FEATURES] == 4 and
    x[TRAINING_DETAILS.HIDDEN_DIM] == 800
]
    , metric='loss'
    , filename='fig4b.pgf'
)

display.Image(filename=f'./fig4b.pgf.png')

### Figure 5

In [None]:
plot_results(
[x for x in np.array(results) if 
    x[TRAINING_DETAILS.FEATURES] == 8 and
    x[TRAINING_DETAILS.HIDDEN_DIM] == 400
]
    , metric='loss'
    , filename='fig5a.pgf'
)

display.Image(filename=f'./fig5a.pgf.png')

In [None]:
plot_results(
[x for x in np.array(results) if 
    x[TRAINING_DETAILS.FEATURES] == 8 and
    x[TRAINING_DETAILS.HIDDEN_DIM] == 1600
]
    , metric='loss'
    , filename='fig5b.pgf'
)

display.Image(filename=f'./fig5b.pgf.png')

### Figure 6 (7 in paper)

In [None]:
nfeatures = [1]
hd = [200, 50]

plot_results3(
[x for x in np.array(results) if 
    x[TRAINING_DETAILS.NAME] in ['RAES'] and 
    x[TRAINING_DETAILS.FEATURES] in nfeatures and
    x[TRAINING_DETAILS.HIDDEN_DIM] in hd
] + [x for x in np.array(results) if 
    x[TRAINING_DETAILS.NAME] in ['RAE'] and 
    x[TRAINING_DETAILS.FEATURES] in nfeatures and
    x[TRAINING_DETAILS.HIDDEN_DIM] in hd
] + [x for x in np.array(results) if 
    x[TRAINING_DETAILS.NAME] in ['RAESC'] and 
    x[TRAINING_DETAILS.FEATURES] in nfeatures and
    x[TRAINING_DETAILS.HIDDEN_DIM] in hd
]
    , lambda res: [sum(res[TRAINING_DETAILS.TIMINGS][:idx+1]) for idx, _ in enumerate(res[TRAINING_DETAILS.TIMINGS])]
    , metric='loss'
    , filename=f'fig6a.pgf'
    , label_fmt='{NAME} {HD}'
    , line_styles_mapping = {'RAE': ['--'], 'RAESC': ['-'], 'RAES': ['-']}
    , color_mapping = {
          'RAE': ['red', 'firebrick', 'orange', 'salmon', 'gold', 'olive', 'y'],
          'RAESC': ['green', 'olive', 'lightgreen', 'black'],
          'RAES': ['blue']
      }
    , xlim=(-1, 101)
    , ylim=(-0.01, 1.1)
    , ylabel='loss'
    , xlabel='time [s]'
    , xticks=np.arange(25)*50
)

display.Image(filename=f'./fig6a.pgf.png')

In [None]:
nfeatures = [2]
hd = [400, 100]

plot_results3(
[x for x in np.array(results) if 
    x[TRAINING_DETAILS.NAME] in ['RAES'] and 
    x[TRAINING_DETAILS.FEATURES] in nfeatures and
    x[TRAINING_DETAILS.HIDDEN_DIM] in hd
] + [x for x in np.array(results) if 
    x[TRAINING_DETAILS.NAME] in ['RAE'] and 
    x[TRAINING_DETAILS.FEATURES] in nfeatures and
    x[TRAINING_DETAILS.HIDDEN_DIM] in hd
] + [x for x in np.array(results) if 
    x[TRAINING_DETAILS.NAME] in ['RAESC'] and 
    x[TRAINING_DETAILS.FEATURES] in nfeatures and
    x[TRAINING_DETAILS.HIDDEN_DIM] in hd
]
    , lambda res: [sum(res[TRAINING_DETAILS.TIMINGS][:idx+1]) for idx, _ in enumerate(res[TRAINING_DETAILS.TIMINGS])]
    , metric='loss'
    , filename=f'fig6__.pgf'
    , label_fmt='{NAME} {HD}'
    , line_styles_mapping = {'RAE': ['--'], 'RAESC': ['-'], 'RAES': ['-']}
    , color_mapping = {
          'RAE': ['red', 'firebrick', 'orange', 'salmon', 'gold', 'olive', 'y'],
          'RAESC': ['green', 'olive', 'lightgreen', 'black'],
          'RAES': ['blue']
      }
    , xlim=(-1, 101)
    , ylim=(-0.01, 1.1)
    #, figsize=(13, 8)
    , ylabel='loss'
    , xlabel='time [s]'
    , xticks=np.arange(25)*50
)

display.Image(filename=f'./fig6b.pgf.png')

### Figure 7 (6 in paper)

In [None]:
nfeatures = [2]
hd = [400]

plot_results3(
    [x for x in np.array(results) if 
        x[TRAINING_DETAILS.NAME] in ['RAESC'] and 
        x[TRAINING_DETAILS.FEATURES] in nfeatures and
        x[TRAINING_DETAILS.HIDDEN_DIM] in hd
    ] 
        + [x for x in np.array(results) if 
        x[TRAINING_DETAILS.NAME] in ['RAE'] and 
        x[TRAINING_DETAILS.FEATURES] in nfeatures and
        x[TRAINING_DETAILS.HIDDEN_DIM] in hd
    ]
    , lambda res: [sum(res[TRAINING_DETAILS.TIMINGS][:idx+1]) for idx, _ in enumerate(res[TRAINING_DETAILS.TIMINGS])]
    , metric=['loss', 'val_loss']
    , filename=f'fig7b.pgf'
    , label_fmt='{NAME} ({METRIC})'
    , line_styles_mapping = {'RAE': ['--'], 'RAESC': ['-'], 'RAES': ['-']}
    , color_mapping = {
          'RAE': ['red', 'firebrick', 'orange', 'salmon', 'gold', 'olive', 'y'],
          'RAESC': ['green', 'olive', 'lightgreen', 'black'],
          'RAES': ['blue', 'cyan']
      }
    , xlim=(-1, 101)
    , ylim=(-0.01, 1.2)
    #, figsize=(13, 8)
    , ylabel='loss'
    , xlabel='time [s]'
    , xticks=np.arange(25)*50
)

#display.Image(filename=f'./fig7.pgf.png')

In [None]:
nfeatures = [1]
hd = [50]

plot_results3(
    [x for x in np.array(results) if 
        x[TRAINING_DETAILS.NAME] in ['RAESC'] and 
        x[TRAINING_DETAILS.FEATURES] in nfeatures and
        x[TRAINING_DETAILS.HIDDEN_DIM] in hd
    ] 
        + [x for x in np.array(results) if 
        x[TRAINING_DETAILS.NAME] in ['RAE'] and 
        x[TRAINING_DETAILS.FEATURES] in nfeatures and
        x[TRAINING_DETAILS.HIDDEN_DIM] in hd
    ]
    , lambda res: [sum(res[TRAINING_DETAILS.TIMINGS][:idx+1]) for idx, _ in enumerate(res[TRAINING_DETAILS.TIMINGS])]
    , metric=['loss', 'val_loss']
    , filename=f'fig7a.pgf'
    , label_fmt='{NAME} ({METRIC})'
    , line_styles_mapping = {'RAE': ['--'], 'RAESC': ['-'], 'RAES': ['-']}
    , color_mapping = {
          'RAE': ['red', 'firebrick', 'orange', 'salmon', 'gold', 'olive', 'y'],
          'RAESC': ['green', 'olive', 'lightgreen', 'black'],
          'RAES': ['blue', 'cyan']
      }
    , xlim=(-1, 61)
    , ylim=(-0.01, 1.2)
    #, figsize=(13, 8)
    , ylabel='loss'
    , xlabel='time [s]'
    , xticks=np.arange(25)*50
)

# Table 1 (epoch time - median)

In [None]:
results_df['time_median'] = results_df[TRAINING_DETAILS.TIMINGS].apply(lambda x: np.median(x[1:]))
results_df['sigma'] = (results_df[TRAINING_DETAILS.HIDDEN_DIM]/(results_df[TRAINING_DETAILS.FEATURES]*results_df[TRAINING_DETAILS.SEQ_LEN]))

In [None]:
results_df_sorted = results_df[
    (results_df[TRAINING_DETAILS.FEATURES].isin([1, 2, 4, 8])) &
    (results_df['sigma'].isin([0.25, 0.5, 1.0]))
].sort_values([TRAINING_DETAILS.FEATURES, TRAINING_DETAILS.NAME], ascending=[True, True])

In [None]:
for feature in [1, 2, 4, 8]:
    for algorithm in ['RAE', 'RAES', 'RAESC']:
        medians = []
        for sigma in [0.25, 0.5, 1.0]:
            tmp = results_df_sorted[
                (results_df[TRAINING_DETAILS.FEATURES] == feature) &
                (results_df[TRAINING_DETAILS.NAME] == algorithm) &
                (results_df['sigma'] == sigma)
            ]
            
            if len(tmp['time_median'].values) > 0:
                medians.append(f"{tmp['time_median'].values[0]:<9.2f}")
                #print(tmp['time_median'].values[0])
            else:
                medians.append(f"{'-':9}")
        print(f'{feature:<10} & {algorithm:<14} & {" & ".join(medians)} \\\\')
    print('\\hline')