# Examine model fit using test data

Author: Kara Ponder (SLAC)

In [None]:
import tensorflow as tf

import time
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random

import transformer as tran

%matplotlib inline


In [None]:
# Set parameters
d_model = 128 #6 # input vector must have length d_model
target_vocab_size = 6 #100 #4  # possible results to choose from

lc_length = 100 +1 # light curve length
input_vocab_size = lc_length

## hyperparameters:
num_layers = 8 # 4 #
dropout_rate = 0.0
dff = 64 # hidden layer size of the feed forward network, needs to be larger than 24
num_heads = 8 #6 #3 # d_model % num_heads == 0

# LC stuff
N = 10000 # number of objects
N_days = 100 + 1
Nf = 6 # number of filters
num_classes = 4


batch_size = 64
EPOCHS = 15

## Check loss functions

In [None]:
loss = np.loadtxt(YOUR_LOSS_TEXT_FILE)

plt.plot(loss, label='WHAT MAKES ME SPECIAL')
plt.ylabel('loss')
plt.xlabel('iteration')
plt.legend()

## Looking at predictions

Define the loss function. 

In [None]:
optimizer = tf.keras.optimizers.Adam(0.00001)

def loss_kld(layer1, layer2, alpha=0.3):
    alpha = tf.constant(alpha, dtype=tf.float32)
    layer1 = layer1[0]
    layer1 = tf.math.abs(layer1)
    layer2 = layer2[0]
    layer2 = tf.math.abs(layer2)

    def loss(y_true, y_pred):
        ones = tf.ones(layer1.shape, dtype=tf.float32)
        rhoc = 0.00001
        rho = rhoc*ones
        

        def kld(layer):
            kld_1 = tf.math.multiply(rhoc, tf.math.log(tf.math.divide_no_nan(rho, layer)))
            kld_2 = tf.math.multiply((1.0 - rhoc), tf.math.divide_no_nan(tf.math.log(ones-rho), tf.math.log(ones-layer)))
            return tf.reduce_sum(kld_1 + kld_2) #kld_1_without_nans + kld_2_without_nans)

        mse = tf.math.reduce_mean(tf.square(y_true - y_pred))
        rmse = tf.math.sqrt(mse)
        return rmse + tf.multiply(alpha, (kld(layer1) + kld(layer2)))
    return loss

Define the model

In [None]:
encoder = tran.Encoder(num_layers, d_model, num_heads, dff,
                       lc_length, dropout_rate, embed=True)

decoder = tran.Decoder(num_layers, d_model, num_heads, dff,
                       lc_length, dropout_rate, embed=True)

final_layer = tf.keras.layers.Dense(target_vocab_size)

inp = tf.keras.layers.Input(shape=(None,Nf))
target = tf.keras.layers.Input(shape=(None,Nf))
wgts = tf.keras.layers.Input(shape=(None,Nf))
mask = tf.keras.layers.Input(shape=(None,Nf))

x = tf.keras.layers.Masking(mask_value=0.)(inp)
x = encoder(x)
x = decoder(target, x, mask=tran.create_decoder_masks(inp, target))
x = final_layer(x)
mx = tf.keras.layers.Multiply()([x, mask])

model = tf.keras.models.Model(inputs=[inp, target, mask], outputs=mx)
model.compile(optimizer=optimizer, loss=loss_kld(model.get_layer(name='encoder').get_weights(),
                                                 model.get_layer(name='decoder').get_weights()))

Load the weights

In [None]:
model.load_weights('LOCATION_OF_SAVED_WEIGHTS.h5')

Define an evaluation function

In [None]:
def evaluate(lc_data, mask_map, wgt_map):
    inp_lc = tf.expand_dims(lc_data, 0)
    inp_lc_wgt = tf.expand_dims(wgt_map, 0)
    inp_lc_mask = tf.expand_dims(mask_map, 0)
    decoder_input=tf.constant([[0.0]*Nf]) # This depends on what the baseline values are. Typically zero #
    output = tf.expand_dims(decoder_input, 0)
    for i in range(N_days-1):
        predictions = model([inp_lc, output,  inp_lc_mask[:, 1:i+2, :], inp_lc_wgt])#, training=False) # if batching may need predict.
        predictions = predictions[: ,-1:, :] #* tf.expand_dims(inp_lc_wgt[:, i,:],0)
        output = tf.concat([output, predictions], axis=1)
        
    return tf.squeeze(output, axis=0)

Load in some data. Some fake data is included in the repo but it is not the PLAsTiCC data. It is simplified data based on an empirical function of the rise and fall of Type Ia Supernovae called the Bazin function/parameterization. 

In [None]:
lc_data = np.load('lc_data.npy')
real_lc_data = np.load('real_lc_data.npy')

wgt_map = np.load('weightmap.npy')

mask_map = wgt_map * 1/0.1**2
mask_map[np.where(mask_map == 0)] = 1/2**2

dataset = tf.data.Dataset.from_tensor_slices((lc_data, real_lc_data, wgt_map, mask_map))
batch_ds = dataset.batch(batch_size)

Predict using test data and the evaluate function

In [None]:
check_lc = tf.constant(lc_data[0])
check_lc_mask = tf.constant(wgt_map[0], dtype=tf.float32)
check_lc_wgt = tf.constant(mask_map[0], dtype=tf.float32)
pred=evaluate(check_lc, check_lc_mask, check_lc_wgt)

Plot results

In [None]:
plt.plot(lc_data[0, 1:, 0], 'ro', lw = 2, alpha=0.4, label='lc data')
plt.plot(pred[1:, 0], 'o', lw=2, label='predicted lc ', alpha=0.5)
plt.plot(real_lc_data[0, 1:, 0], 'ko', lw = 2, alpha=0.4, label='model')

plt.legend()