# Autoencoders

For Colab, it downloads the data using the following code. 

If you work locally be sure to have the folder *data/*. You can download it from the repo.

In [1]:
try:
  from google.colab import files
  !wget -q https://github.com/Amelrich/Capstone-Fall-2020/archive/master.zip
  !unzip -q master.zip
  !rm -rf data
  !mv Capstone-Fall-2020-master/data/ data/
  !mv Capstone-Fall-2020-master/TS_utils.py /content/
  !rm -rf master.zip Capstone-Fall-2020-master/
except:
  print("only in Colab")

In [19]:
import numpy as np
import numpy.random as rd
import pandas as pd
import matplotlib.pyplot as plt
import itertools

from TS_utils import TS_generator, Synthetic_TS_generator
from TS_utils import MedianScaler, DCT_lowpass_filter
from TS_utils import KMedians

from sklearn.cluster import KMeans

# **Autoencoders**

In [20]:
import keras
from keras import Model
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

## Model

In [21]:
class Autoencoder:
    def __init__(self, time_step, embedding_size,
                 n_features=1, plot_model=False):
      
        self.encoder = Sequential()
        self.encoder.add(Dense(embedding_size, input_shape=(time_step,)))

        self.decoder = Sequential()
        self.decoder.add(Dense(time_step))

        self.autoencoder = Model( self.encoder.input, self.decoder(self.encoder.output) )
        self.encoding_model = Model( self.encoder.input, self.encoder.output )

        opt = Adam(learning_rate=0.005)
        self.autoencoder.compile(optimizer=opt, loss='mse')

        if plot_model:
          print(self.autoencoder.summary())

    def fit(self, X_train, X_test, epochs=25, verbose=0):
        self.autoencoder.fit(X_train, X_train, batch_size=16, epochs=epochs, shuffle=True,
                             verbose=verbose, validation_data=(X_test, X_test))

    def encode(self, x):
        return self.encoding_model.predict(x)

    def predict(self, x):
        return self.autoencoder.predict(x)


In [26]:
def AE_clustering(X, n_short=None, n_clusters=7):

  print('#'*54)
  print('##### Welcome to Autoencoder clustering pipeline ##### ')
  print('#'*54, '\n')

  long_scale = X.shape[1]
  if n_short == None:
    short_scale = long_scale // 3
  else:
    short_scale = n_short

  print('Scaling raw data')
  lowpass_filter = DCT_lowpass_filter()
  scaler = MinMaxScaler()

  Xscale = lowpass_filter.fit_transform(X)
  Xscale = scaler.fit_transform(Xscale.T).T

  X_train, X_val = Xscale[:X.shape[0]//2,:], Xscale[X.shape[0]//2:,:]

  autoencoder = Autoencoder(time_step=long_scale, embedding_size=short_scale)
  print('Training the autoencoder neural network...')
  autoencoder.fit(X_train, X_val, epochs=100, verbose=0)
  print('Training process done!')
  del(X_train)
  del(X_val)

  print('Clustering...')
  embedding = autoencoder.encode(Xscale)
  embedding = MinMaxScaler().fit_transform(embedding.T).T
  X_cluster = np.concatenate([Xscale[:,-short_scale:], embedding], axis=1)

  cl = KMeans(n_clusters=n_clusters)
  y_train_pred = cl.fit_predict(X_cluster)
  print('Clustering done! Your turn Hritik ;)')
  del(Xscale)

  return y_train_pred

## Exemple

short scale recommended from long scale:
* 80 -> 30
* 70 -> 25
* 60 -> 20
* 50 -> 20
* 40 -> 15
* 30 -> 10
* 20 -> 10

In [28]:
gen = TS_generator(nb_timeseries=2000, chunk_size=60)
X = gen.get_array()

AE_clustering(X, n_short=20, n_clusters=7)

######################################################
##### Welcome to Autoencoder clustering pipeline ##### 
###################################################### 

Scaling raw data
Training the autoencoder neural network...
Training process done!
Clustering...
Clustering done! Your turn Hritik ;)


array([4, 0, 5, ..., 5, 0, 1], dtype=int32)