In [3]:
%load_ext autoreload
%autoreload 2
from functools import partial

import keras

from datasets import example_datasets, to_numpy
from models import mixture_poissons,location_specific_linear, CustomPenalizedMixtureDecisionModel, get_mixture
from metrics import mixture_poi_loss, get_bpr_loss_func, mix_bpr, get_penalized_bpr_loss_func_mix, cross_ratio_decision, get_perturbed_bpr_func
from experiments import training_loop, training_loop_score_function_trick, score_function_trick, overall_gradient_calculation
from plotting_funcs import plot_losses, plot_frontier

import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np

import matplotlib.pyplot as plt
import resource
import datetime

2024-05-13 14:13:12.451792: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-05-13 14:13:12.609203: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-13 14:13:12.609246: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-13 14:13:12.610512: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-05-13 14:13:12.634727: I tensorflow/core/platform/cpu_feature_guar

In [29]:
seed=360
num_components=4
learning_rate = 0.05
epochs=200
outdir = '/cluster/home/kheuto01/testdir'
penalty = 5000
threshold = 0.55
K=4
do_only=True
# tracts/distributions
S=12
# history/features
H = 3
# total timepoints
T= 500
perturbed_sigma=0.3
num_score_func_samples=10
batch_size=300

In [30]:
train_dataset, val_dataset, test_dataset = example_datasets(H, T, seed=seed, batch_size=batch_size)
train_X_THS, train_y_TS = to_numpy(train_dataset)
val_X_THS, val_y_TS = to_numpy(val_dataset)
input_shape = (H,S)

bpr_K = get_perturbed_bpr_func(K, sigma=perturbed_sigma)
optimizer = keras.optimizers.Adam(learning_rate=learning_rate)


In [31]:
# BPR should be (1+2+3+4)/(3+4+5+6) = 10/18 = 0.5555
bpr_K(np.array([[1,2,3,4,5,6]], dtype=np.float32), np.array([[6,5,4,3,2,1]], dtype=np.float32))

<tf.Tensor: shape=(1,), dtype=float32, numpy=array([0.5561111], dtype=float32)>

In [32]:
def location_specific_linear(input_shape, seed=360, activation='softplus'):
    keras.utils.set_random_seed(seed)
    model = keras.Sequential(
        [
            keras.layers.Input(name='linear_input', shape=input_shape),
            # convolution to turn H,S into 1,S
            # Filters = size oute convolutiput space
            # kernel_size = size of thon window
            # dataformat = channels_first means that the input shape is (batch_size, features, time)
            keras.layers.Conv1D(name='linear_convolution', filters=1, kernel_size=1, activation=activation, data_format='channels_first'),
            keras.layers.Flatten(name='linnear_flatten'),
        ]
    )
    return model

In [33]:
class MixtureWeights(keras.layers.Layer):
    """Dumb layer that just returns mixture weights
    Constrained to unit norm
    """
    def __init__(self, num_locations, num_components=2, **kwargs):
        super().__init__(**kwargs)
        self.w = self.add_weight(name='shared_mix_weights',
            shape=(num_locations, num_components ),
            initializer="uniform",
            trainable=True,
        )
        
        self.softmax = keras.layers.Softmax(axis=1)

    def call(self, inputs):
        return self.softmax(self.w)

In [34]:
def get_mixture(model, input_shape, num_components=2, seed=360):

    num_features, num_locations = input_shape

    member_models = []
    for c in range(num_components):
        member_models.append(model(input_shape, seed=seed+1000*c))

    # Define layers
    inputs = keras.Input(shape=input_shape, name='mix_input')
    reshape_layer = keras.layers.Reshape(name='mix_reshape', target_shape=(-1,1))
    concat_layer = keras.layers.Concatenate(name='mix_concat',axis=-1)
    add_layer = keras.layers.Add(name='add_const')

    mixture_weight_layer = MixtureWeights(num_locations, name='mixture_weights', num_components=num_components)

    # Reshape results of member models to add component dimension
    reshaped = [reshape_layer(member(inputs)) for member in member_models]
    # Concatenate along component dimension
    concatted = concat_layer(reshaped)
    # add a constant so rate is always positive
    added = add_layer([concatted, tf.constant([1e-13])])
    
    # obtain mixture weights
    mixture_weights = mixture_weight_layer(inputs)

    # Get a mixture of poissons
    mixture_distribution_layer = tfp.layers.DistributionLambda(lambda params: 
        tfp.distributions.MixtureSameFamily(mixture_distribution=tfp.distributions.Categorical(probs=params[0]),
                                            components_distribution = tfp.distributions.Poisson(rate=params[1], validate_args=True)))
    
    outputs = mixture_distribution_layer([mixture_weights, added])

    model = keras.Model(name='mix_model',inputs=inputs, outputs=[outputs])
    
    return model


In [35]:
model  = get_mixture(location_specific_linear, input_shape, num_components=num_components)

In [36]:
# just 50 rows
example_X = train_X_THS[:50]
example_y = train_y_TS[:50]
example_X.shape

(50, 3, 12)

In [37]:
mixture_distribution = model(example_X)

In [39]:
sample_y = mixture_distribution.sample(10)
print(f'Sample shape {sample_y.shape} is sample-by-batch-by-location')
log_probs = mixture_distribution.log_prob(sample_y)

Sample shape (10, 50, 12) is sample-by-batch-by-location


In [42]:
def score_function_trick(jacobian_MBSp, decision_MBS):

    # every parameter has some shape in addition to their batch and location dimension.
    # Find that shape, and add axes to the decision gradient to match
    num_param_dims = tf.rank(jacobian_MBSp)-3
    # expand decision to match jacobian
    num_param_dims_tf = tf.cast(num_param_dims, tf.int32)
    new_shape = tf.concat([tf.shape(decision_MBS), tf.ones([num_param_dims_tf], tf.int32)], axis=0)
    decision_MBSp=tf.reshape(decision_MBS, new_shape)

    # scale gradient by decision
    scaled_jacobian_MBSp = jacobian_MBSp*decision_MBSp

    # average over sample dims
    param_gradient_BSp = tf.reduce_mean(scaled_jacobian_MBSp, axis=0)

    return param_gradient_BSp



def overall_gradient_calculation(gradient_BSp, decision_gradient_BS):

    # every parameter has some shape in addition to their batch and location dimension.
    # Find that shape, and add axes to the decision gradient to match
    num_param_dims = tf.rank(gradient_BSp)-2
    num_param_dims_tf = tf.cast(num_param_dims, tf.int32)
    new_shape = tf.concat([tf.shape(decision_gradient_BS), tf.ones([num_param_dims_tf], tf.int32)], axis=0)
    decision_gradient_BSp=tf.reshape(decision_gradient_BS, new_shape)

    # scale gradient by decision gradient
    overall_gradient_BSp = gradient_BSp*decision_gradient_BSp

    # sum over batch and location
    overall_gradient = tf.reduce_sum(overall_gradient_BSp, axis=[0,1])

    return overall_gradient
    




In [43]:
# Make 2 tapes, 1 to track jacobian of samples w.r.t model
# other for gradient of loss w.r.t decision
with tf.GradientTape() as jacobian_tape, tf.GradientTape() as loss_tape:
    mixture = model(example_X)

    sample_y_MBS = mixture.sample(num_score_func_samples)+1e-9
    sample_log_probs_MBS = mixture.log_prob(sample_y_MBS)

    sample_decisions_MBS = cross_ratio_decision(sample_y_MBS)
    expected_decisions_BS = tf.reduce_mean(sample_decisions_MBS, axis=0)

    bpr_B = bpr_K(example_y, expected_decisions_BS)
    observed_log_prob_BS = mixture.log_prob(example_y)

    loss_B = -tf.reduce_sum(observed_log_prob_BS, axis=-1)
    violate_threshold_flag_B = tf.cast(tf.greater(threshold, bpr_B),
                                       tf.float32)
    loss_B += penalty * violate_threshold_flag_B *(threshold - bpr_B)

jacobian_pMBS = jacobian_tape.jacobian(sample_log_probs_MBS, model.trainable_weights)
param_gradient_pBS = [score_function_trick(j, sample_decisions_MBS) for j in jacobian_pMBS]

loss_gradients_BS = loss_tape.gradient(loss_B, expected_decisions_BS)
overall_gradient = [overall_gradient_calculation(g, loss_gradients_BS) for g in param_gradient_pBS]



In [44]:
overall_gradient

[<tf.Tensor: shape=(1, 3, 1), dtype=float32, numpy=
 array([[[1525.3921  ],
         [  13.128357],
         [ -82.121086]]], dtype=float32)>,
 <tf.Tensor: shape=(1,), dtype=float32, numpy=array([84.285675], dtype=float32)>,
 <tf.Tensor: shape=(1, 3, 1), dtype=float32, numpy=
 array([[[-1427.94 ],
         [-4536.369],
         [-2829.64 ]]], dtype=float32)>,
 <tf.Tensor: shape=(1,), dtype=float32, numpy=array([59.452374], dtype=float32)>,
 <tf.Tensor: shape=(1, 3, 1), dtype=float32, numpy=
 array([[[-2886.3953],
         [-1164.0221],
         [ 4933.247 ]]], dtype=float32)>,
 <tf.Tensor: shape=(1,), dtype=float32, numpy=array([-85.699356], dtype=float32)>,
 <tf.Tensor: shape=(1, 3, 1), dtype=float32, numpy=
 array([[[-7.950815e-02],
         [-6.704994e+02],
         [-9.544615e-03]]], dtype=float32)>,
 <tf.Tensor: shape=(1,), dtype=float32, numpy=array([44.94523], dtype=float32)>,
 <tf.Tensor: shape=(12, 4), dtype=float32, numpy=
 array([[ 129.37689 , -217.15352 ,  -42.850838,  130.