<a href="https://colab.research.google.com/github/nina-prog/DataAnalysis_VAE/blob/main/VAE_v1_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [17]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.layers import Dense
from keras.layers import TimeDistributed
from keras.utils.vis_utils import plot_model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

# Data Preprocessing

---



## Load Data

In [18]:
### Load ecg5000 data using read_csv
ecg5000 = pd.read_csv('ECG5000_ALL.txt', sep='\s+', header=None)

### Delete label-column first (column 0)
ecg5000.drop(ecg5000.columns[[0]], axis=1, inplace=True)

In [None]:
### Optional test and info about data set
print("Type of ecg5000: \t \t {}".format(type(ecg5000)))
print("Dimensions of ecg5000: \t \t {}".format(ecg5000.shape))
print("Number of elements of ecg5000: \t {}".format((ecg5000.size)))
print("Display first 10 rows of ecg5000: \n {}".format(ecg5000.head(10)))

## Scale Data

In [None]:
### Normalize dataframe with min-max-normalization to range between [-0.8, 0.8] using sklearn MinMaxScaler
min_max_scaler = MinMaxScaler(feature_range=(-0.8,0.8))
scaled_ecg5000 = pd.DataFrame(min_max_scaler.fit_transform(ecg5000))
print(scaled_ecg5000)

## Split Data

In [None]:
### Split Data into 80/20 Training, Test
trainDF, testDF = train_test_split(scaled_ecg5000, test_size=0.2)
# Optional test and info about new data sets
print("Shape of Training DataFrame: \t {}".format(trainDF.shape))
print("Shape of Test DataFrame: \t {}".format(testDF.shape))

## Reshape Data

In [None]:
### Convert to array
x_train = trainDF.to_numpy()
x_test = testDF.to_numpy()

### Reshape input into [samples, timesteps, features]
s_train = len(trainDF.index) # samples
s_test = len(testDF.index) # samples
n_train = len(trainDF.columns) # time steps
n_test = len(testDF.columns) # time steps
x_train = x_train.reshape(s_train, n_train, 1)
x_test = x_test.reshape(s_test, n_test, 1)

### Properties
print("Shape of reshaped train dataset: {}".format(x_train.shape))
print("Shape of reshaped test dataset: {}".format(x_test.shape))

# Create Sample Layer

---



In [None]:
#############################################################sample flyer function die für vae verwendet wird

# Build Variational Autoencoder (VAE)

---



In [None]:
### For better understanding visit: https://towardsdatascience.com/autoencoders-for-the-compression-of-stock-market-data-28e8c1a2da3e
### For better understanding of layers and Recreating auto encoders visit: https://machinelearningmastery.com/lstm-autoencoders/
### or for code: https://gist.github.com/GerardBCN/40349b39bc45d4550141aff6966d1619#file-stock_price_autoencoding-ipynb
### For Reshaping Issues: https://machinelearningmastery.com/reshape-input-data-long-short-term-memory-networks-keras/

### fit model
encoding_dim = 140
epochs = 50

### Define model
# Encoder
##################################################################################try bidirectional lstm , (maybe update encoding dimension in code)
inputs = keras.Input(shape=(140, 1), name='Input_layer')
encoded = layers.LSTM(encoding_dim, activation='tanh', name='Encode_1')(inputs)
encoded = layers.Dense(5, activation='tanh', name='Encode_2')(encoded)
#5, because 5 class in data ecg5000 - evtl 2,1
##################################################################################here m,sigma (VAE) Dense layer 2x
# Decoder
decoded = layers.Dense(140, activation='tanh', name='Decode_1')(encoded)
decoded = layers.Reshape((140,1), name='Decode_2')(decoded)
decoded = layers.LSTM(encoding_dim, return_sequences=True, activation='tanh', name='Decode_3')(decoded)
outputs = TimeDistributed(Dense(1, activation='tanh', name=''),name='Output_Layer')(decoded)

sequence_autoencoder = keras.Model(inputs, outputs)
encoder = keras.Model(inputs, encoded)
##################################################################################decoder = keras.Model(encoded, outputs) somewhat
sequence_autoencoder.summary()

### Train AE
##################################################################################change loss for VAE
sequence_autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
history = sequence_autoencoder.fit(x_train, x_train,
                epochs=epochs,
                batch_size=32,
                shuffle=True,
                validation_data=(x_test, x_test))

### Recreation
decoded_ecg5000 = sequence_autoencoder.predict(x_train)
plot_model(sequence_autoencoder, show_shapes=True, to_file='reconstruct_lstm_autoencoder.png')

# Plot Results

---



In [None]:
### Test if Input fits Dim of Output
print(x_train.shape)
print(decoded_ecg5000.shape)

### Covert to 2D DataFrame and only show column 0
new_x_train= x_train.reshape(-1,140)
new_decoded_ecg5000 = decoded_ecg5000.reshape(-1,140)

### One sample plot
i = 34 # indize/sample which is going to be plotted
plt.figure(linewidth = 1, figsize=(25,6))
plt.title('Autoencoder Result')
plt.xlabel('time steps')
plt.plot(new_x_train[i], label='original ecg5000')
plt.plot(new_decoded_ecg5000[i], label='decoded ecg5000')
plt.legend(loc="upper left")
plt.show()

##################################################################################add subplot (10 plots) - using iteration?