# MDN Model training demo

This notebook demonstrates the training process of the MDN model using a small subset of our main training data set.

All necessary data used in this notebook can be found in the ./data/training_demo directory.

# Imports

In [None]:
import os
from pprint import pprint

import numpy as np
import pandas as pd
import json

import tensorflow as tf
%load_ext tensorboard

from tensorflow_probability import distributions as tfd

from tensorflow.keras.layers import Input, InputLayer, Dense, Activation, Concatenate, Dropout
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard, ReduceLROnPlateau
from tensorflow.keras import backend as K

import exomdn.mdn_layer as mdn

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, FunctionTransformer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from scipy.stats import norm as normal
import joblib

from pathlib import Path
from datetime import datetime

In [None]:
print(f"TF version: {tf.__version__}")
print("Available devices:")
pprint(tf.config.list_physical_devices(), width=1)

# Defining the MDN architecture

We are using a custom MDN layer from https://github.com/cpmpercussion/keras-mdn-layer

The training architecture and parameters are defined in the model_parameters.json file and used in the `build_mdn` function.

In [None]:
def build_mdn(n_inputs=2, n_outputs=1, n_layers=2, units_per_layer=100, components=10, **kwargs):
    learning_rate = kwargs.get("learning_rate", 0.001)

    model = tf.keras.Sequential(name="MDN")
    # Input layer
    model.add(InputLayer(input_shape=(n_inputs,), name="input"))
    for i in range(n_layers):
        # Hidden layers
        model.add(Dense(units_per_layer, activation="relu", kernel_initializer="glorot_uniform", name=f"relu_{i}"))
    
    # Add MDN output layer
    model.add(mdn.MDN(n_outputs, components, name="output_mdn"))
    
    # use custom loss (negative loss likelihood for training) 
    model.compile(loss=mdn.get_mixture_loss_func(n_outputs, components),
                  optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))
    return model

# Setup

In [None]:
config_path = Path("../data/training_demo/")
data_path = Path("../data/training_demo/")
models_path = Path("../models")
models_path.mkdir(parents=True, exist_ok=True)

# Tensorboard log directory
tf_logs = (models_path / "_tf_logs")
tf_logs.mkdir(parents=True, exist_ok=True)

In [None]:
with open(config_path / "model_parameters.json", "r") as f:
    parameters = json.load(f)
    
# parameter_space.json contains min/max values for input parameters, for checking parameter ranges during predictions
with open(config_path / "parameter_space.json", "r") as f:
    parameter_space = json.load(f)

inputs = parameters["inputs"]
outputs = parameters["outputs"]
model_id = parameters["model_id"]
seed = parameters.get("seed", 42)
save_path = models_path / model_id
log_path = tf_logs / model_id

# write input parameter ranges to config file
parameters["input_properties"]= {key: parameter_space[key] for key in inputs if key in parameter_space}

# Prepare training data

In [None]:
print("Loading data: " + str(data_path / parameters["training_data"]))
columns = inputs + outputs
data = pd.read_csv(data_path / parameters["training_data"], usecols=columns)[columns]
data.describe()

## Preprocessing pipeline

In [None]:
X = data[inputs]
Y = data[outputs]

# log transform planet_mass column
mass_transf = ColumnTransformer([("log_mass", FunctionTransformer(np.log10), [X.columns.get_loc("planet_mass")])], remainder="passthrough")
preprocessor = Pipeline([("log_mass", mass_transf),
                         ("scaler", StandardScaler())])
X_scaled = preprocessor.fit_transform(X)
Y_scaled = np.array(Y)

# split into train/test data
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y_scaled, test_size=parameters["validation_size"],
                                                    random_state=seed, shuffle=True)
print(f"X_train: {X_train.shape} | Y_train: {Y_train.shape}")
print(f"X_test: {X_test.shape} | Y_test: {Y_test.shape}")
print(f"Inputs: {inputs}\nPredict: {outputs}")

# Save preprocessor and parameters
print("Saving preprocessor and parameters...")
joblib.dump(preprocessor, save_path / "preprocessor.pkl")
with open(save_path / "setup_parameters.json", "w") as fp:
    json.dump(parameters, fp, indent=4)

# MDN setup

We are using TensorBoard to track training performance.

In [None]:
tensorboard_mdn = TensorBoard(log_dir=log_path, histogram_freq=5, write_graph=False, write_images=False)
early_stopping = EarlyStopping(monitor="val_loss", min_delta=0, patience=parameters["patience"], verbose=1, mode="auto")
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.1, patience=parameters.get("patience_lr", 4), verbose=1, min_delta=0)

print(f"Log directory: {log_path}")

mdn_model = build_mdn(n_inputs=len(inputs), n_outputs=len(outputs), **parameters["architecture"])
mdn_model.summary()

# Model training

In [None]:
# Start TensorBoard monitor to monitor training.
%tensorboard --logdir {tf_logs}

In [None]:
# Start training
mdn_model.fit(x=X_train, y=Y_train, validation_data=(X_test, Y_test),
              epochs=parameters["epochs"], batch_size=parameters["batch_size"],
              callbacks=[reduce_lr, early_stopping, tensorboard_mdn], verbose=1)

# Save model
tf.keras.models.save_model(mdn_model, save_path / "model")