In [None]:
#Load required libraries
import pandas as pd

import tensorflow as tf
import keras
import numpy as np

from keras.models import Sequential, Model
from keras.layers import Input, Activation, Dense
from keras.layers import Conv2D, GlobalAveragePooling2D, BatchNormalization
from keras.layers import Dense, Dropout
from keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.optimizers import *

from sklearn.model_selection import train_test_split
from sklearn.calibration import CalibratedClassifierCV
from sklearn.calibration import calibration_curve
import sklearn.metrics as metrics
from sklearn.metrics import log_loss
from sklearn.metrics import confusion_matrix

In [None]:
def fit_TemperatureCalibration(train_X_y, valid_X_y=None, epochs=100):
    ### From: https://github.com/cerlymarco/MEDIUM_NoteBook/blob/master/NeuralNet_Calibration/NeuralNet_Calibration.ipynb;
    ###inspired by: https://github.com/stellargraph/stellargraph/blob/develop/stellargraph/calibration.py ###

    T = tf.Variable(tf.ones(shape=(1,)), name="T")
    history = []
    early_stopping = False
    optimizer = SGD(learning_rate=0.001)

    def cost(T, x, y):

        scaled_logits = tf.multiply(x=x, y=1.0 / T)

        cost_value = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=scaled_logits, labels=y)
        )

        return cost_value

    def grad(T, x, y):

        with tf.GradientTape() as tape:
            cost_value = cost(T, x, y)

        return cost_value, tape.gradient(cost_value, T)


    X_train, y_train = train_X_y
    if valid_X_y:
        X_valid, y_valid = valid_X_y
        early_stopping = True


    for epoch in range(epochs):
        train_cost, grads = grad(T, X_train, y_train)
        optimizer.apply_gradients(zip([grads], [T]))
        if early_stopping:
            val_cost = cost(T, X_valid, y_valid)
            if (len(history) > 0) and (val_cost > history[-1][1]):
                break
            else:
                history.append([train_cost, val_cost, T.numpy()[0]])
        else:
            history.append([train_cost, T.numpy()[0]])

    history = np.asarray(history)
    temperature = history[-1, -1]

    return temperature

def cal_softmax(x):
    """
    From: https://github.com/markus93/NN_calibration
    Compute softmax values for each sets of scores in x.

    Parameters:
        x (numpy.ndarray): array containing m samples with n-dimensions (m,n)
    Returns:
        x_softmax (numpy.ndarray) softmaxed values for initial (m,n) array
    """
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=1, keepdims=1)



def evaluate(probs, y_true, verbose = False, normalize = False, bins = 15):
    """
    Modified from: https://github.com/markus93/NN_calibration
    Evaluate model using various scoring measures: Error Rate, ECE, MCE, NLL, Brier Score

    Params:
        probs: a list containing probabilities for all the classes with a shape of (samples, classes)
        y_true: a list containing the actual class labels
        verbose: (bool) are the scores printed out. (default = False)
        normalize: (bool) in case of 1-vs-K calibration, the probabilities need to be normalized.
        bins: (int) - into how many bins are probabilities divided (default = 15)

    Returns:
        (error, loss, brier), returns various scoring measures
    """

    preds = np.argmax(probs, axis=1)  # Take maximum confidence as prediction

    if normalize:
        confs = np.max(probs, axis=1)/np.sum(probs, axis=1)
        # Check if everything below or equal to 1?
    else:
        confs = np.max(probs, axis=1)  # Take only maximum confidence

    accuracy = metrics.accuracy_score(y_true, preds) * 100
    error = 100 - accuracy

    loss = log_loss(y_true=y_true, y_pred=probs)

    if verbose:
        print("Accuracy:", accuracy)
        print("Error:", error)
        print("Loss:", loss)

    return (error, loss)

In [None]:
# loading parameters
#These are from simulations under the specific HIV priors.
param_train_BD = pd.read_csv('./parameters_BD.txt', sep='\t')
param_test_BD = pd.read_csv('./testset/parameters_BD.txt', sep='\t')
param_train_BDEI = pd.read_csv('./parameters_BDEI.txt', sep='\t')
param_test_BDEI = pd.read_csv('./testset/parameters_BDEI.txt', sep='\t')
param_train_BDSS = pd.read_csv('./parameters_BDSS.txt', sep='\t')
param_test_BDSS = pd.read_csv('./testset/parameters_BDSS.txt', sep='\t')


# loading tree encoding for the 3 phylodynamics models. Again, obtained under the specific HIV priors.
encoding_BD = pd.read_csv('./Encoded_trees_BD.csv', sep="\t", header=0, index_col=0).values.reshape(-1,1000,18)
encoding_test_BD = pd.read_csv('./testset/Encoded_trees_BD.csv', sep="\t", header=0, index_col=0).values.reshape(-1,1000,18)
encoding_BDEI = pd.read_csv('./Encoded_trees_BDEI.csv', sep="\t", header=0, index_col=0).values.reshape(-1,1000,18)
encoding_test_BDEI = pd.read_csv('./testset/Encoded_trees_BDEI.csv', sep="\t", header=0, index_col=0).values.reshape(-1,1000,18)
encoding_BDSS = pd.read_csv('./Encoded_trees_BDSS.csv', sep="\t", header=0, index_col=0).values.reshape(-1,1000,18)
encoding_test_BDSS = pd.read_csv('./testset/Encoded_trees_BDSS.csv', sep="\t", header=0, index_col=0).values.reshape(-1,1000,18)

In [None]:
#Load the empirical data
encoding_Zurich = pd.read_csv('./Encoded_Zurich.csv', sep="\t", header=0, index_col=0).values.reshape(-1,1000,18)

In [None]:
#Split the test set in two halves, one for calibration and another one for testing.
param_cal_BD = param_test_BD.iloc[param_test_BD.shape[0]//2:,:]
param_cal_BDEI = param_test_BDEI.iloc[param_test_BDEI.shape[0]//2:,:]
param_cal_BDSS = param_test_BDSS.iloc[param_test_BDSS.shape[0]//2:,:]

#Split the test set in two halves, one for calibration and another one for testing.
param_test_BD = param_test_BD.iloc[:param_test_BD.shape[0]//2,:]
param_test_BDEI = param_test_BDEI.iloc[:param_test_BDEI.shape[0]//2,:]
param_test_BDSS = param_test_BDSS.iloc[:param_test_BDSS.shape[0]//2,:]

In [None]:
#Split the test set in two halves, one for calibration and another one for testing.
encoding_cal_BD = encoding_test_BD[encoding_test_BD.shape[0]//2:,:,:]
encoding_cal_BDEI = encoding_test_BDEI[encoding_test_BDEI.shape[0]//2:,:,:]
encoding_cal_BDSS = encoding_test_BDSS[encoding_test_BDSS.shape[0]//2:,:,:]

#Split the test set in two halves, one for calibration and another one for testing.
encoding_test_BD = encoding_test_BD[:encoding_test_BD.shape[0]//2,:,:]
encoding_test_BDEI = encoding_test_BDEI[:encoding_test_BDEI.shape[0]//2,:,:]
encoding_test_BDSS = encoding_test_BDSS[:encoding_test_BDSS.shape[0]//2,:,:]

In [None]:
# remove irrelevant columns: rescaling factor
encoding_BD=np.delete(encoding_BD, -1, axis=1)
encoding_cal_BD=np.delete(encoding_cal_BD, -1, axis=1)
encoding_test_BD=np.delete(encoding_test_BD, -1, axis=1)
encoding_BDEI=np.delete(encoding_BDEI, -1, axis=1)
encoding_cal_BDEI=np.delete(encoding_cal_BDEI, -1, axis=1)
encoding_test_BDEI=np.delete(encoding_test_BDEI, -1, axis=1)
encoding_BDSS=np.delete(encoding_BDSS, -1, axis=1)
encoding_cal_BDSS=np.delete(encoding_cal_BDSS, -1, axis=1)
encoding_test_BDSS=np.delete(encoding_test_BDSS, -1, axis=1)

#Add labels for each simulation (a different labl for each model)
Y = [0 for i in range(len(encoding_BD))]
Y.extend([1 for i in range(len(encoding_BDEI))])
Y.extend([2 for i in range(len(encoding_BDSS))])
Y = np.array(Y)

Y_test = [0 for i in range(len(encoding_test_BD))]
Y_test.extend([1 for i in range(len(encoding_test_BDEI))])
Y_test.extend([2 for i in range(len(encoding_test_BDSS))])
Y_test = np.array(Y_test)

In [None]:
#Now insert an additional column with sampling proba for all nodes

samp_proba_list = np.array(param_train_BD['sampling_proba'])
encoding_BD=np.concatenate((encoding_BD,np.repeat(samp_proba_list,999).reshape(-1,999,1)),axis=2)

samp_proba_list_test = np.array(param_test_BD['sampling_proba'])
encoding_test_BD=np.concatenate((encoding_test_BD,np.repeat(samp_proba_list_test,999).reshape(-1,999,1)),axis=2)

samp_proba_list_cal = np.array(param_cal_BD['sampling_proba'])
encoding_cal_BD=np.concatenate((encoding_cal_BD,np.repeat(samp_proba_list_cal,999).reshape(-1,999,1)),axis=2)

encoding_cal_BD.shape

In [None]:
#Now insert an additional column with sampling proba for all nodes

samp_proba_list = np.array(param_train_BDEI['sampling_proba'])
encoding_BDEI=np.concatenate((encoding_BDEI,np.repeat(samp_proba_list,999).reshape(-1,999,1)),axis=2)

samp_proba_list_test = np.array(param_test_BDEI['sampling_proba'])
encoding_test_BDEI=np.concatenate((encoding_test_BDEI,np.repeat(samp_proba_list_test,999).reshape(-1,999,1)),axis=2)

samp_proba_list_cal = np.array(param_cal_BDEI['sampling_proba'])
encoding_cal_BDEI=np.concatenate((encoding_cal_BDEI,np.repeat(samp_proba_list_cal,999).reshape(-1,999,1)),axis=2)

In [None]:
#Now insert an additional column with sampling proba for all nodes

samp_proba_list = np.array(param_train_BDSS['sampling_proba'])
encoding_BDSS=np.concatenate((encoding_BDSS,np.repeat(samp_proba_list,999).reshape(-1,999,1)),axis=2)

samp_proba_list_test = np.array(param_test_BDSS['sampling_proba'])
encoding_test_BDSS=np.concatenate((encoding_test_BDSS,np.repeat(samp_proba_list_test,999).reshape(-1,999,1)),axis=2)

samp_proba_list_cal = np.array(param_cal_BDSS['sampling_proba'])
encoding_cal_BDSS=np.concatenate((encoding_cal_BDSS,np.repeat(samp_proba_list_cal,999).reshape(-1,999,1)),axis=2)

In [None]:
#Now insert an additional column with sampling proba for all nodes

samp_proba_list = encoding_Zurich[:,-1,-1]
encoding_Zurich=np.concatenate((encoding_Zurich,np.repeat(samp_proba_list,1000).reshape(-1,1000,1)),axis=2)


encoding_Zurich=np.delete(encoding_Zurich, -1, axis=1)

In [None]:
# This function takes in the tree encodings for both training and testing datasets
# and processes them to have a uniform shape. It also pads the leaves and nodes 
# of the trees to ensure each tree has a fixed number of 500 leaves and nodes.

def encode_pad_0s_rootage(enc, enc_test, enc_cal):
    # Create an empty list to hold padded training encodings
    enc_pad = []
    
    # Iterate over each tree in the training dataset
    for i in range(enc.shape[0]):
        # Separate the leaves (where column 3 has value 1, which indicates leaves)
        leaves = enc[i][enc[i,:,3] == 1]
        # Sort leaves by their age (assumed to be in column 1)
        leaves = leaves[np.argsort(leaves[:, 1])]
        # Pad the leaves array with 0s until it has a maximum size of 500 leaves
        leaves = np.pad(leaves, [(0, (500 - leaves.shape[0])), (0, 0)], mode='constant')

        # Separate the nodes (where column 3 is greater than 1, indicating internal nodes)
        nodes = enc[i][enc[i,:,3] > 1]
        # Sort nodes by their age (assumed to be in column 1)
        nodes = nodes[np.argsort(nodes[:, 1])]
        # Copy the last node's value to balance the number of leaves and nodes
        nodes = np.append(nodes, nodes[-1].reshape(1, -1), axis=0)
        # Pad the nodes array with 0s to ensure a size of 500 nodes
        nodes = np.pad(nodes, [(0, (500 - nodes.shape[0])), (0, 0)], mode='constant')
        
        # Stack the leaves and nodes arrays together along axis 2 (creating 2 channels)
        enc_pad.append(np.stack((leaves, nodes), axis=2))
    
    # Now process the test dataset (same procedure as above)
    enc_pad_test = []
    for i in range(enc_test.shape[0]):
        # Extract and sort leaves
        leaves = enc_test[i][enc_test[i,:,3] == 1]
        leaves = leaves[np.argsort(leaves[:, 1])]
        # Pad leaves to ensure size of 500
        leaves = np.pad(leaves, [(0, (500 - leaves.shape[0])), (0, 0)], mode='constant')

        # Extract and sort nodes
        nodes = enc_test[i][enc_test[i,:,3] > 1]
        nodes = nodes[np.argsort(nodes[:, 1])]
        # Copy the last node's value to balance the number of leaves and nodes
        nodes = np.append(nodes, nodes[-1].reshape(1, -1), axis=0)
        # Pad nodes to ensure size of 500
        nodes = np.pad(nodes, [(0, (500 - nodes.shape[0])), (0, 0)], mode='constant')
        
        # Stack the leaves and nodes arrays together along axis 2 (creating 2 channels)
        enc_pad_test.append(np.stack((leaves, nodes), axis=2))
    
        # Now process the test dataset (same procedure as above)
    enc_pad_cal = []
    for i in range(enc_cal.shape[0]):
        # Extract and sort leaves
        leaves = enc_cal[i][enc_cal[i,:,3] == 1]
        leaves = leaves[np.argsort(leaves[:, 1])]
        # Pad leaves to ensure size of 500
        leaves = np.pad(leaves, [(0, (500 - leaves.shape[0])), (0, 0)], mode='constant')

        # Extract and sort nodes
        nodes = enc_cal[i][enc_cal[i,:,3] > 1]
        nodes = nodes[np.argsort(nodes[:, 1])]
        # Copy the last node's value to balance the number of leaves and nodes
        nodes = np.append(nodes, nodes[-1].reshape(1, -1), axis=0)
        # Pad nodes to ensure size of 500
        nodes = np.pad(nodes, [(0, (500 - nodes.shape[0])), (0, 0)], mode='constant')
        
        # Stack the leaves and nodes arrays together along axis 2 (creating 2 channels)
        enc_pad_cal.append(np.stack((leaves, nodes), axis=2))
    
    # Convert lists to numpy arrays and return the padded training and test data
    return np.array(enc_pad), np.array(enc_pad_test), np.array(enc_pad_cal)


#Change encoding to order by root age and pad with 0s
encoding_pad_BD, encoding_pad_test_BD, encoding_pad_cal_BD = encode_pad_0s_rootage(encoding_BD, encoding_test_BD, encoding_cal_BD)
encoding_pad_BDEI, encoding_pad_test_BDEI, encoding_pad_cal_BDEI = encode_pad_0s_rootage(encoding_BDEI, encoding_test_BDEI, encoding_cal_BDEI)
encoding_pad_BDSS, encoding_pad_test_BDSS, encoding_pad_cal_BDSS = encode_pad_0s_rootage(encoding_BDSS, encoding_test_BDSS, encoding_cal_BDSS)

#Combine encodings from the 3 models
encoding_pad = np.concatenate((encoding_pad_BD,encoding_pad_BDEI,encoding_pad_BDSS),axis=0)
encoding_pad_test = np.concatenate((encoding_pad_test_BD,encoding_pad_test_BDEI,encoding_pad_test_BDSS),axis=0)
encoding_pad_cal = np.concatenate((encoding_pad_cal_BD,encoding_pad_cal_BDEI,encoding_pad_cal_BDSS),axis=0)

#Delete intermediate variables
del(encoding_BD,encoding_BDEI,encoding_BDSS,encoding_pad_BD,encoding_pad_BDEI,encoding_pad_BDSS)
del(encoding_pad_test_BD,encoding_pad_test_BDEI,encoding_pad_test_BDSS)
del(encoding_pad_cal_BD,encoding_pad_cal_BDEI,encoding_pad_cal_BDSS)

In [None]:
# This creates the arrays in the correct dimensions to use with channels first (batch, 2, 500, 19)
def encode_pad_0s_rootage(enc):
    enc_pad=[]
    for i in range(enc.shape[0]):
        #separate leaves and nodes
        leaves=enc[i][enc[i,:,3] == 1]
        leaves=leaves[np.argsort(leaves[:, 1])]
        #pad leaves with 0s until reaching maximum tree size (500 leaves)
        leaves=np.pad(leaves, [(0, (500-leaves.shape[0])),(0,0)], mode='constant')
        nodes=enc[i][enc[i,:,3] > 1]
        nodes=nodes[np.argsort(nodes[:, 1])]
        #copy the last node value to have similar number of leaves and nodes
        nodes=np.append(nodes, nodes[-1].reshape(1, -1), axis=0)
        #pad nodes with 0s until reaching maximum tree size (500 leaves)
        nodes=np.pad(nodes, [(0, (500-nodes.shape[0])),(0,0)], mode='constant')
        enc_pad.append(np.stack((leaves, nodes), axis=2))

    return np.array(enc_pad)

#Change encoding to order by root age and pad with 0s
encoding_pad_Zurich = encode_pad_0s_rootage(encoding_Zurich)

In [None]:
#Transform labels into one-hot encoding
Y = np.eye(3)[Y]
#Separate training and validation sets
Y, Y_valid, encoding_pad, encoding_pad_valid = train_test_split(Y,encoding_pad,test_size=0.3, shuffle=True,stratify=Y)

In [None]:
# Creation of the Network Model: model definition
def build_model(xtest):
    
    # - Input shape: (500, 19, 2) where 500 is the number of tree leaves/nodes, 19 is the feature size, and 2 is the number of channels (leaves and nodes)
    inputShape = (xtest.shape[1], xtest.shape[2], xtest.shape[3])
    inputs = Input(shape=inputShape)
    x = inputs

    # First convolutional layer: 
    # - Filters: 32 
    # - Kernel size: (1, 19), sliding across the second dimension of the input 
    # - Input shape: (500, 19, 2) where 500 is the number of tree leaves/nodes, 19 is the feature size, and 2 is the number of channels (leaves and nodes)
    # - Activation function: ELU (Exponential Linear Unit)
    # - Groups: 2 to apply separate convolutions for the two channels (leaves and nodes)
    x = Conv2D(filters = 32, use_bias=False, kernel_size=(1,19), input_shape= (500, 19, 2), activation='elu', groups = 2)(x)
 
    # Apply batch normalization to stabilize and speed up the training process
    x = BatchNormalization()(x)
    
    # Second convolutional layer: 
    # - Filters: 32
    # - Kernel size: (1, 1) to process each feature independently
    # - Activation function: ELU
    x = Conv2D(filters = 32, use_bias=False, kernel_size=(1,1), activation='elu')(x)

    # Apply batch normalization again
    x = BatchNormalization()(x)
    
    # Third convolutional layer: 
    # - Filters: 32
    # - Kernel size: (1, 1) for further feature processing
    # - Activation function: ELU
    x = Conv2D(filters = 32, use_bias=False, kernel_size=(1,1), activation='elu')(x)

    # Apply batch normalization for the final time before flattening
    x = BatchNormalization()(x)
    
    # Flatten the 2D feature maps from the convolutional layers into a 1D vector, 
    # which will be passed to the fully connected (dense) layers
    x = GlobalAveragePooling2D()(x)
    
    # Fully connected (FFNN) part:
    # Dense layers with decreasing number of units, all using ELU activation:
    x = Dense(64, activation='elu')(x)   # First dense layer with 64 units
    x = Dense(32, activation='elu')(x)   # Second dense layer with 32 units
    x = Dense(16, activation='elu')(x)   # Third dense layer with 16 units
    x = Dense(8, activation='elu')(x)   # Fourth dense layer with 8 units
    
    # The last layer here needs to be changed to perform temperature scaling.
    # Output layer: 
    # - 3 output neurons, corresponding to the 3 models
    logits = Dense(3, name='logits')(x)
    # - Activation function: softmax
    out = Activation('softmax')(logits) 
    
    model = Model(inputs, out)
    # Show the summary of the model structure (number of layers, shapes of outputs, etc.)

    model.summary()

    # Return the constructed model
    return model

In [None]:
from keras import losses

# Initialize the model using the build_model function that was previously defined
estimator = build_model()

# Compile the model:
# - Loss function: categorical_crossentropy is used to measure the error between the predicted probability distribution and the true distribution for multi-class classification tasks.
# - Optimizer: 'Adam' is used to minimize the loss function efficiently
# - Metrics: Accuracy is used to track the model's performance during training
estimator.compile(loss=keras.losses.categorical_crossentropy, optimizer = 'Adam', metrics=['accuracy'])

# Early stopping callback to prevent overfitting:
# - monitor: monitor the validation accuracy during training
# - patience: stop training if the validation accuracy doesn't improve for 100 consecutive epochs
# - mode: 'max' indicates that training will stop when the validation accuracy reaches its maximum
# - restore_best_weights: restore the weights from the best epoch (the one with the highest validation accuracy)
early_stop = keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=100, mode='max', restore_best_weights=True)

# Custom callback to display training progress:
# - Print a dot for every epoch (or newline every 100 epochs) to indicate progress in training
class PrintD(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        if epoch % 100 == 0:  # Print a newline every 100 epochs
            print('')
        print('.', end='')  # Print a dot to indicate progress during each epoch

# Set the maximum number of epochs (iterations over the entire dataset)
EPOCHS = 1000

# Train the model using the `fit` method:
# - encoding_pad: The padded training data (inputs)
# - Y: The target values (outputs)
# - verbose: set to 1 to print progress during training
# - epochs: The number of times to iterate over the entire dataset
# - validation_split: the fraction of data to use for validation (used to monitor validation loss)
# - batch_size: the number of samples per gradient update
# - callbacks: list of callbacks to be used during training (early stopping and progress display)
history = estimator.fit(encoding_pad, Y, verbose=1, epochs=EPOCHS, validation_data=(encoding_pad_valid, Y_valid), batch_size=1, callbacks=[early_stop, PrintD()])

# Save the model architecture to a JSON file:
# - The model structure (architecture) is saved as a JSON string
from keras.models import model_from_json
model = estimator.to_json()
with open('./Trained_Models/Trained_2Generation_PhyDyn.json', 'w') as json_file:
    json_file.write(model)

# Save the model weights to an H5 file:
# - The weights (learned parameters) of the trained model are saved to a file
estimator.save_weights('./Trained_Models/Trained_2Generation_PhyDyn.h5')

# Print a confirmation message when the model and weights are saved
print('model saved!')

In [None]:
#load the model
from keras.models import model_from_json
json_file = open('./Trained_Models/Trained_2Generation_PhyDyn.json', 'r')
model = json_file.read()
json_file.close()
estimator = model_from_json(model)
#load weights
estimator.load_weights('./Trained_Models/Trained_2Generation_PhyDyn.h5')
print('model loaded!')

# predict values for the test set
predicted_test = np.array(estimator.predict(encoding_pad_test))

pred_cat = [i.argmax() for i in predicted_test]

# Print the confusion matrix
print (confusion_matrix(Y_test, pred_cat))

In [None]:
# Prepare the calibration labels
y= np.eye(3)[Y_test]
shf = list(range(len(encoding_pad_cal)))
shuffle(shf)

y_cal = y[shf]
xcal = encoding_pad_cal[shf]

# Divide the calibration set in two to estimate the temperature
x_cal_train, x_cal_test = xcal[int(len(Y_test)*.5):], xcal[:int(len(Y_test)*.5)]
ytrain, ytest = y_cal[int(len(y_cal)*.5):], y_cal[:int(len(y_cal)*.5)]

In [None]:
# Get the logit scores for the calibration and test set
model_score = Model(inputs=estimator.input, outputs=estimator.layers[-1].output)
X_train_calib = model_score.predict(x_cal_train)
X_valid_calib = model_score.predict(x_cal_test)

# Fit the temperature
temperature = fit_TemperatureCalibration((X_train_calib,ytrain), (X_valid_calib,ytest), epochs=1000)
print ("Temperature",temperature)

In [None]:
# Now predict the test set using the estimated temperature
pred_cal = model_score.predict(encoding_pad_test)
scaled_prediction = cal_softmax(pred_cal/temperature)

In [None]:
# Predict the empirical data with temperature
Emp_calpred = model_score.predict(encoding_pad_Zurich)
Emp_calpred = cal_softmax(Emp_calpred/temperature)

Emp_calpred

In [None]:
# And predit the empirical data without temperature
predicted_test = np.array(estimator.predict(encoding_pad_Zurich))
predicted_test