In [1]:
%load_ext autoreload
%autoreload 2
# Sanity check on implementation of 
import tensorflow as tf
import tensorflow_probability as tfp
import numpy as np
import time
# Set a limit on the memory usage of the GPU
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
            tf.config.experimental.set_virtual_device_configuration(
                gpu,
                [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)])
    except RuntimeError as e:
        print(e)


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


2025-02-11 15:18:03.624828: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-02-11 15:18:03.624878: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-02-11 15:18:03.624890: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-11 15:18:03.635043: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Ensure that log prob calculations are the same as tfp implementation & faster

In [2]:
# Define your array (as a TensorFlow tensor)
n_rows = 10000 # i.e., number of voxels 
n_cols = 200 # i.e., number of samples in timeseries
num_iterations = 1000
data = tf.constant(np.random.rand(n_rows, n_cols), dtype=tf.float32)
scale_values = tf.constant(np.random.rand(n_rows, 1), dtype=tf.float32)
dof_values = tf.constant(np.random.rand(n_rows, 1), dtype=tf.float32)

2025-02-11 15:18:11.982695: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1886] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 4096 MB memory:  -> device: 0, name: Tesla K40m, pci bus id: 0000:03:00.0, compute capability: 3.5


In [3]:


from braincoder.utils.math import calculate_log_prob_gauss
for device in ['GPU', 'CPU']:
    print(f'Trying with {device}')
    with tf.device(f'/{device}:0'):
        # Timing the custom log probability calculation
        start_time = time.time()
        for _ in range(num_iterations):
            calculate_log_prob_gauss(data, scale_values)  # Now correctly broadcasted
        time_custom = time.time() - start_time
        output_custom = calculate_log_prob_gauss(data, scale_values)
        # Timing the TFP log probability calculation
        start_time = time.time()
        for _ in range(num_iterations):
            normal_dist = tfp.distributions.Normal(loc=0.0, scale=scale_values)  # Correct shape
            normal_dist.log_prob(data)  # Correct shape
        time_tfd = time.time() - start_time
        output_tfd = normal_dist.log_prob(data)
        # Print the results
        print(f"Custom log probability calculation time over {num_iterations} iterations: {time_custom:.6f} seconds")
        print(f"TFP log probability calculation time over {num_iterations} iterations: {time_tfd:.6f} seconds")
        print(f"Custom method is  {time_tfd/time_custom:.3f} x faster")
        print(f"     tfd,        custom")
        for i1 in range(3):
            for i2 in range(3):
                print(f'{output_tfd[i1,i2]:10.3f}, {output_custom[i1,i2]:10.3f}')


Trying with GPU
Custom log probability calculation time over 1000 iterations: 0.946002 seconds
TFP log probability calculation time over 1000 iterations: 2.083450 seconds
Custom method is  2.202 x faster
     tfd,        custom
    -1.360,     -1.360
    -0.358,     -0.358
    -0.588,     -0.588
    -8.465,     -8.465
    -2.790,     -2.790
     0.141,      0.141
    -1.126,     -1.126
    -2.986,     -2.986
    -2.169,     -2.169
Trying with CPU
Custom log probability calculation time over 1000 iterations: 2.680341 seconds
TFP log probability calculation time over 1000 iterations: 3.291053 seconds
Custom method is  1.228 x faster
     tfd,        custom
    -1.360,     -1.360
    -0.358,     -0.358
    -0.588,     -0.588
    -8.465,     -8.465
    -2.790,     -2.790
     0.141,      0.141
    -1.126,     -1.126
    -2.986,     -2.986
    -2.169,     -2.169


In [4]:
from braincoder.utils.math import calculate_log_prob_t
for device in ['GPU', 'CPU']:
    print(f'Trying with {device}')
    with tf.device(f'/{device}:0'):
        # Timing the custom log probability calculation
        start_time = time.time()
        for _ in range(num_iterations):
            calculate_log_prob_t(data, scale_values, dof_values)  # Now correctly broadcasted
        time_custom = time.time() - start_time
        output_custom = calculate_log_prob_t(data, scale_values, dof_values)

        # Timing the TFP log probability calculation
        start_time = time.time()
        for _ in range(num_iterations):
            t_dist = tfp.distributions.StudentT(df=dof_values, loc=0.0, scale=scale_values)  # Correct shape
            t_dist.log_prob(data)  # Correct shape
        time_tfd = time.time() - start_time
        output_tfd = t_dist.log_prob(data)

        # Print the results
        print(f"Custom log probability calculation time over {num_iterations} iterations: {time_custom:.6f} seconds")
        print(f"TFP log probability calculation time over {num_iterations} iterations: {time_tfd:.6f} seconds")
        print(f"Custom method is  {time_tfd/time_custom:.3f} x faster")
        print(f"     tfd,        custom")
        for i1 in range(3):
            for i2 in range(3):
                print(f'{output_tfd[i1,i2]:10.3f}, {output_custom[i1,i2]:10.3f}')


Trying with GPU
Custom log probability calculation time over 1000 iterations: 3.415824 seconds
TFP log probability calculation time over 1000 iterations: 9.423873 seconds
Custom method is  2.759 x faster
     tfd,        custom
    -2.125,     -2.125
    -1.341,     -1.341
    -1.622,     -1.622
    -2.350,     -2.350
    -1.653,     -1.653
    -0.537,     -0.537
    -1.928,     -1.928
    -2.485,     -2.485
    -2.296,     -2.296
Trying with CPU
Custom log probability calculation time over 1000 iterations: 8.010862 seconds
TFP log probability calculation time over 1000 iterations: 12.927133 seconds
Custom method is  1.614 x faster
     tfd,        custom
    -2.125,     -2.125
    -1.341,     -1.341
    -1.622,     -1.622
    -2.350,     -2.350
    -1.653,     -1.653
    -0.537,     -0.537
    -1.928,     -1.928
    -2.485,     -2.485
    -2.296,     -2.296
