<a href="https://colab.research.google.com/github/ipeirotis/autoencoders_census/blob/main/Autoencoder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install keras-tuner


In [None]:
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from keras.layers import Input, Dense
from keras.models import Model
from kerastuner.tuners import RandomSearch

class AutoencoderModel:
    def __init__(self):
        self.INPUT_SHAPE = None
        self.D = 5
        self.TEST_SIZE = 0.2
        self.RANDOM_STATE = 42
        self.SEED = 42
        self.VAL_LOSS = 'val_loss'
        self.MAX_TRIALS = 10
        self.EXECUTIONS_PER_TRIAL = 3
        self.DIRECTORY = 'my_dir'
        self.PROJECT_NAME = 'HDHyperparameter'
        self.OVERWRITE = True
        self.EPOCHS = 10
        self.BATCH_SIZE = 32
        self.NUM_TRIALS = 1

    def preprocess_dataset(self, df):
        df = df.fillna(0.0)
        X_train, X_test = train_test_split(df, test_size=self.TEST_SIZE, random_state=self.RANDOM_STATE)
        self.INPUT_SHAPE = X_train.shape[1:]
        return X_train, X_test

    @staticmethod
    def masked_mse(y_true, y_pred):
        mask = tf.where(tf.math.is_nan(y_true), 0.0, 1.0)
        return tf.reduce_mean(tf.square(y_true - y_pred) * mask)

    def build_encoder(self, hp):
        inputs = Input(shape=self.INPUT_SHAPE)
        x = Dense(units=hp.Int('encoder_units_1', min_value=32, max_value=256, step=32), activation='relu')(inputs)
        x = Dense(units=hp.Int('encoder_units_2', min_value=16, max_value=128, step=16), activation='relu')(x)
        latent_space = Dense(units=self.D, activation='relu')(x)
        return Model(inputs, latent_space)

    def build_decoder(self, hp):
        decoder_inputs = Input(shape=(self.D,))
        x = Dense(units=hp.Int('decoder_units_1', min_value=16, max_value=128, step=16), activation='relu')(decoder_inputs)
        x = Dense(units=hp.Int('decoder_units_2', min_value=32, max_value=256, step=32), activation='relu')(x)
        outputs = Dense(units=self.INPUT_SHAPE[0], activation='linear')(x)
        return Model(decoder_inputs, outputs)

    def build_autoencoder(self, hp):
        learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
        batch_size = hp.Int('batch_size', min_value=16, max_value=128, step=16)
        autoencoder_input = Input(shape=self.INPUT_SHAPE)
        encoder_output = self.build_encoder(hp)(autoencoder_input)
        decoder_output = self.build_decoder(hp)(encoder_output)
        autoencoder = Model(autoencoder_input, decoder_output)
        autoencoder.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss=self.masked_mse)
        return autoencoder

    def define_tuner(self):
        tuner = RandomSearch(
            self.build_autoencoder,
            objective=self.VAL_LOSS,
            max_trials=self.MAX_TRIALS,
            executions_per_trial=self.EXECUTIONS_PER_TRIAL,
            directory=self.DIRECTORY,
            project_name=self.PROJECT_NAME,
            overwrite=self.OVERWRITE,
            seed=self.SEED)
        return tuner
