In [1]:
import sys
import time
import random
import csv
import numpy as np

import scipy.io
from scipy.io import loadmat

import torch
import pickle

from tqdm import tqdm
from tqdm import notebook

import matplotlib
import matplotlib.pyplot as plt

import importlib
import utils

# Enable autoreload
%load_ext autoreload
%autoreload 2
importlib.reload(utils)

# Set seeds for reproducibility
random.seed(0)
np.random.seed(0)
torch.manual_seed(0)
if torch.cuda.is_available():
    torch.cuda.manual_seed(0)
    torch.cuda.manual_seed_all(0)  # if you are using multi-GPU.
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark     = False

TORCH_DTYPE = torch.float64 #NB: Basically all of the matrices in Spatial_GP have 1.e-7 added to the diagonal, to be changed if we want to use float64
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
torch.set_default_dtype(TORCH_DTYPE)
torch.set_default_device(device)
print(f'Device is: {device}')



Using device: cuda:0 (from utils.py)
Using device: cuda:0 (from utils.py)
Device is: cuda:0


### How this works:

We choose initial training points to be considered a random set of images to present to the retina. On this set, the complete GP will be run , to find STA paramaters eps_0: center, beta: width , rho: smoothness

We run the algorithm saving the seed used for picking the training images, so that this set can be changed and different runs with different initial conditions can be averaged.

1. Import the dataset and create a total training set X,R
2. Pick the cell and the initial training points, extracted randomly. These correspond also to the number of inducing points
3. Save the seed so you can keep the initial training set, and the fitted model
3. 


### Parameters of the training

In [2]:
rand_xtilde = True # If True, xtilde (inducing points) are chosen randomly, if False, xtilde is chosen from the first ntilde images

cellid       = 8         # Choose cell
ntrain_start = 20        # Number of first training data points

kernfun      = 'acosker' # Choose kernel function

nEstep       = 8         # Total number of E-steps iterations.
nFparamstep  = 5  
nMstep       = 6         # Total number of M-steps iterations. 
maxiter      = 5         # Iterations of the optimization algorithm comprising M and E steps

ntilde       = ntrain_start

### Import dataset and generate starting dataset

Create starting dataset on which to train with m step with ntilde = ntrain_start

In [3]:
# Open the .pkl dataset file for reading in binary mode (rb)
with open('/home/idv-eqs8-pza/IDV_code/Variational_GP/spatial_GP/Data/data2_41mixed_tr28.pkl', 'rb') as file:
    # Load the data from the file
    loaded_data = pickle.load(file)
    # loaded_data is a Dataset object from module Data with attributes "images_train, _val, _test" as well as responses

X_train = torch.tensor(loaded_data.images_train).to(device, dtype=TORCH_DTYPE) #shape (2910,108,108,1) where 108 is the number of pixels. 2910 is the amount of training points
X_val   = torch.tensor(loaded_data.images_val).to(device, dtype=TORCH_DTYPE)
X_test  = torch.tensor(loaded_data.images_test).to(device, dtype=TORCH_DTYPE) # shape (30,108,108,1) # nimages, npx, npx

R_train = torch.tensor(loaded_data.responses_train).to(device, dtype=TORCH_DTYPE) #shape (2910,41) 2910 is the amount of training data, 41 is the number of cells
R_val   = torch.tensor(loaded_data.responses_val).to(device, dtype=TORCH_DTYPE)
R_test  = torch.tensor(loaded_data.responses_test).to(device, dtype=TORCH_DTYPE) # shape (30,30,42) 30 repetitions, 30 images, 42 cells

# Create the complete dataset
X = torch.cat( (X_train, X_val), axis=0,) #shape (3160,108,108,1)
R = torch.cat( (R_train, R_val), axis=0,)
n_px_side = X.shape[1]  

# Reshape images to 1D vector and choose a cell
X = torch.reshape(X, ( X.shape[0], X.shape[1]*X.shape[2])) 
R = R[:,cellid] # shape (nt,) where nt is the number of trials

# Choose a random subset of the data and save the idx
all_idx  = torch.arange(0, X.shape[0])                     # Indices of the whole dataset  
torch.manual_seed(0)
torch.cuda.manual_seed(0)
rndm_idx = torch.randint(0, X.shape[0], (ntrain_start,))   # These will be the indices of the initial training 

# combined = torch.cat( (all_idx, rndm_idx) )              # Combine the indices, there are now some duplicates
# unique, counts = combined.unique(return_counts=True)

# remaining_idx = unique[counts==1]                        # Indices of the remaining data in the dataset (not yet used for training)
# start_idx     = unique[counts>1]                         # Indices of the data used for the initial training
# in_use_idx    = start_idx                                # Indices of the data used for training, including the initial training

start_idx     = rndm_idx
in_use_idx    = start_idx
remaining_idx = all_idx[~torch.isin( all_idx, in_use_idx )]

# Set the starting set
xtilde_start  = X[start_idx,:]                           # In the simplest case the starting points are all inducing points
X_remaining   = X[remaining_idx,:]
X_in_use      = X[in_use_idx,:]

R_remaining   = R[remaining_idx]
R_in_use      = R[in_use_idx]


# Estimate memory usage
# Calculate memory usage for each tensor
X_memory = X.element_size() * X.nelement()
r_memory = R.element_size() * R.nelement()
# Total memory usage in bytes
total_memory_bytes = X_memory + r_memory
# Convert bytes to megabytes (MB)
total_memory_MB = total_memory_bytes / (1024 ** 2)
print(f'Total dataset memory on GPU: {total_memory_MB:.2f} MB')



Total dataset memory on GPU: 281.23 MB


### Select cell, starting hyperparameters and firing rate parameters

In [4]:
# For details on the hyperparameters choice see one_cell_fit.ipynb
logbetaexpr = utils.fromlogbetasam_to_logbetaexpr( logbetasam=torch.tensor(5.5) )# Logbetaexpr in this code is equal to logbeta in Samuele's code. Samuele's code set logbeta to 5.5
logrhoexpr  = utils.fromlogrhosam_to_logrhoexpr( logrhosam=torch.tensor(5)) 
# logbetaexpr = torch.tensor(4.65)
# logrhoexpr = torch.tensor(4.3)
logsigma_0 = torch.tensor(0) 
sigma_0    = torch.exp(logsigma_0)
Amp        = torch.tensor(1.0) 
eps_0x     = torch.tensor(0.0001)
eps_0y     = torch.tensor(0.0001)
# Hypermarameters, if needed to be set manually
theta = {'sigma_0': sigma_0, 'Amp': Amp, 'eps_0x':eps_0x, 'eps_0y':eps_0y, '-2log2beta': logbetaexpr, '-log2rho2': logrhoexpr,  }

# Set the gradient of the hyperparemters to be updatable 
for key, value in theta.items():
    # to exclude a single hyperparemeters from the optimization ( to exclude them all just set nmstep=0 and dont do the M-step)
    # if key == 'Amp':
        # continue
    theta[key] = value.requires_grad_()

# If hyperparameters are set manually:
hyperparams_tuple = utils.generate_theta( x=X_in_use, r=R_in_use, n_px_side=n_px_side, display=True, **theta)
# If hyperparameters are set based on the STAs:
# hyperparams_tuple = utils.generate_theta( x=X, r=r, n_px_side=n_px_side, display=True)

A        = torch.tensor(0.007)
logA     = torch.log(A)
# lambda0  = torch.tensor(0.31)
lambda0  = torch.tensor(1.)
# f_params = {'logA': logA, 'loglambda0':torch.log(lambda0)}
f_params = {'logA': logA, 'lambda0':lambda0}

for key, value in f_params.items():
    f_params[key] = value.requires_grad_()

fit_parameters = {'ntilde':      ntilde,
                  'maxiter':     maxiter,
                  'nMstep':      nMstep,
                  'nEstep':      nEstep,
                  'nFparamstep': nFparamstep,
                  'kernfun':     kernfun,
                  'cellid':      cellid,
                  'n_px_side':   n_px_side,}

init_model = {
        'fit_parameters':    fit_parameters,
        'xtilde':            xtilde_start,
        'hyperparams_tuple': hyperparams_tuple,     # Contains also the upper and lower bounds for the hyperparameters
        'f_params':          f_params,
        'm':                 torch.zeros( (ntilde) )
        # 'm': torch.ones( (ntilde) )
        #'V': dont initialize V if you want it to be initialized as K_tilde and projected _exactly_ as K_tilde_b for stabilisation
    }


updated sigma_0 to 1.0000
updated Amp to 1.0000
updated eps_0x to 0.0001
updated eps_0y to 0.0001
updated -2log2beta to 4.8069
updated -log2rho2 to 4.3069


### Fit the starting model
And save it needed to start a new active fit

In [5]:
fit_model, err_dict = utils.varGP(X_in_use, R_in_use, **init_model)
# fit_model, err_dict = utils.varGP(X_in_use, R_in_use, **args)

# Save the model. All of the matrices are projected in the eigenspace of big eigenvalues of K_tilde. Indicated by _b
start_model = fit_model

if err_dict['is_error']:
    print('Error in the fit')
    raise err_dict['error']

# Save the model
# utils.save_model(start_model, f'models/starting_models_active_learning/cell:{cellid}_nstart:{ntrain_start}', additional_description='Starting model for active learning')


Initialization took: 0.0508 seconds

Total values_track memory on GPU: 0.00 MB

After initialization Allocated memory: 861.87 MB

After initialization Reserved (cached) memory: 956.00 MB
*Iteration*: 1 E-step took: 0.1927s, M-step took: 0.0501s
*Iteration*: 2 E-step took: 0.0379s, M-step took: 0.0214s
*Iteration*: 3 E-step took: 0.0087s, M-step took: 0.0217s
*Iteration*: 4 E-step took: 0.0381s
M-step skipped in the last iteration
Final Loss: 18.1914

Time spent for E-steps:       0.277s,
Time spent for f params:      0.112s
Time spent for m update:      0.165s
Time spent for M-steps:       0.093s
Time spent for X-steps:       0.371s
Time spent computing Kernels: 0.006s
Time spent computing Loss:    0.002s
Time total after init:        0.383s
Time total before init:       0.434s

Final Total values_track memory on GPU: 0.01 MB
Final Allocated memory: 871.57 MB
Final Reserved (cached) memory: 956.00 MB


Old version.
Find the most useful image and its ID. For now its estimating the utility for one at the time.

In [6]:
# start_model = utils.load_model('models/starting_models_active_learning/cell:8_nstart:50')

xstar = X_remaining # We call xstar the unseen images like in the notes, we will drop this notation later and just call it X_remaining

with torch.no_grad():
# reshape the remaining dataset to
# X_remaining = torch.reshape(Xremaining, (Xremaining.shape[0], Xremaining.shape[0]*Xremaining.shape[1]))

    kernfun       = start_model['fit_parameters']['kernfun']
    mask          = start_model['mask']
    C             = start_model['C']
    B             = start_model['B']
    K_tilde_b     = start_model['K_tilde_b']
    K_tilde_inv_b = start_model['K_tilde_inv_b']
    K_b           = start_model['K_b']
    Kvec          = start_model['Kvec']
    m_b           = start_model['m_b']
    V_b           = start_model['V_b']    
    f_params_fit  = start_model['f_params']
    A             = torch.exp(f_params_fit['logA'])
    lambda0       = torch.exp(f_params_fit['loglambda0']) if 'loglambda0' in f_params_fit else f_params_fit['lambda0']

    start_time = time.time()
    # Calculate the matrices to compute the lambda moments. They are referred to the unseen images xstar
    Kvec = utils.acosker(theta, xstar[:,mask], x2=None, C=C, dC=None, diag=True)
    K    = utils.acosker(theta, xstar[:,mask], x2=xtilde_start[:,mask], C=C, dC=None, diag=False)
    K_b  = K @ B 

    lambda_m_t, lambda_var_t = utils.lambda_moments( xstar[:,mask], K_tilde_b, K_b@K_tilde_inv_b, Kvec, K_b, C, m_b, V_b, theta, kernfun)  
    u_t                      = torch.zeros( X_remaining.shape[0] )
    logf_mean_t              = torch.zeros( X_remaining.shape[0] )
    logf_var_t               = torch.zeros( X_remaining.shape[0] )
    print(f'Elapsed time for lambda moments: {time.time()-start_time:.2f} seconds')
    start_time = time.time()

    r_capped = torch.arange(0, 100, dtype=TORCH_DTYPE)
    for i, x_idx in enumerate(remaining_idx):

        logf_mean = A*lambda_m_t[i] + lambda0
        logf_var  = A**2 * lambda_var_t[i]

        u = utils.utility(logf_var, logf_mean, r_capped )

        # plt.subplot(111)
        # plt.scatter(logf_var.item(), u.item(), color=colors[r_cutoffs.index(r_cutoff)], s=10-r_cutoffs.index(r_cutoff)*5)
        # plt.title(f'r_cutoff: {r_cutoff}')
        # print(f'Utility: {u.item():<8.4f} |  logf_mean: {logf_mean.item():8.4f} |  logf_var: {logf_var.item():6.4f}') 

        u_t[i]         = u
        logf_mean_t[i] = logf_mean
        logf_var_t[i]  = logf_var
        if i == 0 or u > u_best:
            x_idx_best = x_idx
            i_best     = i
            u_best     = u_t[i_best]
            
print(f'\nElapsed time for utility: {time.time()-start_time:.2f} seconds')  
print(f'Utility: {u_t[i_best].item():<8.4f} |  Best image ID: {i_best}')


Elapsed time for lambda moments: 0.01 seconds

Elapsed time for utility: 1.20 seconds
Utility: 0.0000   |  Best image ID: 3052


Old version.
Below a comparison of performance when calculating utility as a vector

In [7]:
# xstar = X_remaining[0,:][None,:]
xstar = X_remaining

# Calculate the matrices to compute the lambda moments. They are referred to the unseen images xstar
Kvec = utils.acosker(theta, xstar[:,mask], x2=None, C=C, dC=None, diag=True)
K    = utils.acosker(theta, xstar[:,mask], x2=xtilde_start[:,mask], C=C, dC=None, diag=False)
K_b  = K @ B 

lambda_m_t, lambda_var_t = utils.lambda_moments( xstar[:,mask], K_tilde_b, K_b@K_tilde_inv_b, Kvec, K_b, C, m_b, V_b, theta, kernfun)  
r_masked = torch.arange(0, 100, dtype=TORCH_DTYPE)

logf_mean = A*lambda_m_t + lambda0
logf_var  = A**2 * lambda_var_t
u         = torch.zeros(xstar.shape[0])

start_time = time.time()
for i, x_idx in enumerate(range(xstar.shape[0])):

    u[i] = utils.utility(logf_var[i], logf_mean[i], r_masked )
    #print(f'Utility: {u.item():<8.4f} |  logf_mean: {logf_mean[i].item():8.4f} |  logf_var: {logf_var[i].item():6.4f}') 
print(f'Elapsed time for utility one by one: {time.time()-start_time:.2f} seconds, mean of u: {u.mean():6f}, max of u: {u.max():6f}, min of u: {u.min():6f}')
        
start_time = time.time()
u2d = utils.nd_utility(logf_var, logf_mean, r_masked )
print(f'Elapsed time for utility as a vec  : {time.time()-start_time:.2f} seconds, mean of u: {u2d.mean():6f}, max of u: {u2d.max():6f}, min of u: {u2d.min():6f}')



Elapsed time for utility one by one: 1.05 seconds, mean of u: 0.000000, max of u: 0.000001, min of u: 0.000000
Elapsed time for utility as a vec  : 0.02 seconds, mean of u: 0.000000, max of u: 0.000001, min of u: 0.000000


### Evaluate the utility of each remaining image
New version

In [8]:
xstar = X_remaining

kernfun       = start_model['fit_parameters']['kernfun']
mask          = start_model['mask']
C             = start_model['C']
B             = start_model['B']
K_tilde_b     = start_model['K_tilde_b']
K_tilde_inv_b = start_model['K_tilde_inv_b']
K_b           = start_model['K_b']
Kvec          = start_model['Kvec']
m_b           = start_model['m_b']
V_b           = start_model['V_b']    
f_params_fit  = start_model['f_params']
A             = torch.exp(f_params_fit['logA'])
lambda0       = torch.exp(f_params_fit['loglambda0']) if 'loglambda0' in f_params_fit else f_params_fit['lambda0']

start_time = time.time()

# Calculate the matrices to compute the lambda moments. They are referred to the unseen images xstar
Kvec = utils.acosker(theta, xstar[:,mask], x2=None, C=C, dC=None, diag=True)
K    = utils.acosker(theta, xstar[:,mask], x2=xtilde_start[:,mask], C=C, dC=None, diag=False)
K_b  = K @ B 

lambda_m_t, lambda_var_t = utils.lambda_moments( xstar[:,mask], K_tilde_b, K_b@K_tilde_inv_b, Kvec, K_b, C, m_b, V_b, theta, kernfun)  

logf_mean = A*lambda_m_t + lambda0
logf_var  = A**2 * lambda_var_t

# Estimate the utility and cap the maximum r ( used in a summation to infinity )
r_masked = torch.arange(0, 100, dtype=TORCH_DTYPE)
u2d = utils.nd_utility(logf_var, logf_mean, r_masked )
print(f'\nElapsed time for lambda moments and utility : {time.time()-start_time:.2f} seconds')  



Elapsed time for lambda moments and utility : 0.02 seconds


### Find the index(es) for the most useful image 
Check the correspondence between the index: ```i_best``` of the ```xstar = X_remaining``` images with the one: ```x_idx_best```  for the complete dataset ```X```

In [9]:
i_best     = u2d.argmax()
x_idx_best = remaining_idx[i_best]
print(f'Utility: {u2d[i_best].item():<8.4f} |  Best image ID: {i_best}  | Best image index: {x_idx_best}')

Kvec = utils.acosker(theta, xstar[:,mask], x2=None, C=C, dC=None, diag=True)
K    = utils.acosker(theta, xstar[:,mask], x2=xtilde_start[:,mask], C=C, dC=None, diag=False)
K_b  = K @ B 
lambda_m_t, lambda_var_t = utils.lambda_moments( xstar[:,mask], K_tilde_b, K_b@K_tilde_inv_b, Kvec, K_b, C, m_b, V_b, theta, kernfun)  
logf_mean = A*lambda_m_t + lambda0
logf_var  = A**2 * lambda_var_t
print(f' this has utility {utils.nd_utility(logf_var[i_best], logf_mean[i_best], r_masked)} and f_mean :{torch.exp(logf_mean[i_best]).item():8.4f}')

# Calculate the matrices to compute the lambda moments. They are referred to the unseen images xstar
Kvec = utils.acosker(theta, X[x_idx_best,mask][None,:], x2=None, C=C, dC=None, diag=True)
K    = utils.acosker(theta, X[x_idx_best,mask][None,:], x2=xtilde_start[:,mask], C=C, dC=None, diag=False)
K_b  = K @ B 
lambda_m_t, lambda_var_t = utils.lambda_moments( X[x_idx_best,mask][None,:], K_tilde_b, K_b@K_tilde_inv_b, Kvec, K_b, C, m_b, V_b, theta, kernfun)  
logf_mean = A*lambda_m_t + lambda0
logf_var  = A**2 * lambda_var_t
print(f' this has utility {utils.nd_utility(logf_var, logf_mean, r_masked)} and f_mean :{torch.exp(logf_mean).item():8.4f}')

Utility: 0.0000   |  Best image ID: 3052  | Best image index: 3072
 this has utility tensor([5.0517e-07], device='cuda:0') and f_mean :  0.1500
 this has utility tensor([5.0517e-07], device='cuda:0') and f_mean :  0.1500


### Update the indexes tensors and fit the new model

In [10]:
# Update the used and remaining indices
in_use_idx    = torch.unique(torch.cat( (in_use_idx, x_idx_best[None])))  
remaining_idx = all_idx[~torch.isin( all_idx, in_use_idx )]

X_in_use    = X[in_use_idx]
R_in_use    = R[in_use_idx] 
X_remaining = X[remaining_idx]
R_remaining = R[remaining_idx]

# The added images are used as inducing points as long as the number of inducing points is less than 200
if in_use_idx.shape[0] < 200:
    ntilde = in_use_idx.shape[0]
    xtilde_updated = X_in_use

start_model['xtilde'] = xtilde_updated
start_model['fit_parameters']['ntilde'] = ntilde

fit_model, err_dict = utils.varGP(X_in_use, R_in_use, **start_model)

if err_dict['is_error']:
    print('Error in the fit')
    raise err_dict['error']
   


Initialization took: 0.0046 seconds

*Iteration*: 0 E-step took: 0.3310s, M-step took: 0.2420s
*Iteration*: 1 E-step took: 0.2831s, M-step took: 0.2267s
*Iteration*: 2 E-step took: 0.2814s
M-step skipped in the last iteration

E-steps took in total: 0.8955s
M-steps took in total: 0.4688s
Final Loss: 17.7385
      time for final loss computation: 0.0018 s
Total time for GP:                     1.4582 s


##### The training set has been updated and so has the xtilde

In [11]:
print(f'Number of starting points {X[start_idx].shape[0]}, Number of to be used points    {X_in_use.shape[0]}')
print(f'Number of initial ntilde  {xtilde_start.shape[0]}, Number of to be updated ntilde {xtilde_updated.shape[0]}')

Number of starting points 10, Number of to be used points    11
Number of initial ntilde  10, Number of to be updated ntilde 11


In [12]:
fit_model['xtilde'].shape

torch.Size([11, 11664])

### Loop like this until the selected amount of images has been shown

In [1]:
n_imgs_to_show = 200
r_capped = torch.arange(0, 100, dtype=TORCH_DTYPE)
for j in range(n_imgs_to_show):

    start_time_utility = time.time()

    kernfun        = fit_model['fit_parameters']['kernfun']
    theta          = fit_model['hyperparams_tuple'][0]
    xtilde_updated = fit_model['xtilde']
    K_tilde_b      = fit_model['K_tilde_b']
    K_tilde_inv_b  = fit_model['K_tilde_inv_b']
    B              = fit_model['B']
    C              = fit_model['C']
    m_b            = fit_model['m_b']
    V_b            = fit_model['V_b']
    mask           = fit_model['mask']

    # The matrices to compute the lambda moments are calculated on the unseen images X_remaining (xstar)
    Kvec = utils.acosker(theta, X_remaining[:,mask], x2=None,                   C=C, dC=None, diag=True)
    K    = utils.acosker(theta, X_remaining[:,mask], x2=xtilde_updated[:,mask], C=C, dC=None, diag=False)
    K_b  = K @ B 

    lambda_m_t, lambda_var_t = utils.lambda_moments( X_remaining[:,mask], K_tilde_b, K_b@K_tilde_inv_b, Kvec, K_b, C, m_b, V_b, theta, kernfun)  

    logf_mean = A*lambda_m_t + lambda0
    logf_var  = A**2 * lambda_var_t

    # Estimate the utility with a cappet .arange() vector r( used in a summation to infinity )
    u2d = utils.nd_utility(logf_var, logf_mean, r_capped )

    i_best     = u2d.argmax()
    x_idx_best = remaining_idx[i_best]

    in_use_idx    = torch.unique(torch.cat( (in_use_idx, x_idx_best[None]) ))
    remaining_idx = all_idx[~torch.isin( all_idx, in_use_idx )]

    X_in_use    = X[in_use_idx]
    R_in_use    = R[in_use_idx] 
    X_remaining = X[remaining_idx]
    R_remaining = R[remaining_idx]

    # The added images are used as inducing points as long as the number of inducing points is less than 200
    if in_use_idx.shape[0] < 200:
        ntilde = in_use_idx.shape[0]
        xtilde_updated = X[in_use_idx]

    print(f'Best utility: {u2d[i_best].item():<8.4f} | Mean Utility {u2d.mean().item():<8.4f} |  Best image ID: {i_best} | Best image index: {x_idx_best} | Elapsed time for utility: {time.time()-start_time_utility:.2f} seconds')


    fit_model['xtilde'] = xtilde_updated
    fit_model['fit_parameters']['ntilde'] = ntilde

    fit_model, err_dict = utils.varGP(X_in_use, R_in_use, **fit_model)

    if err_dict['is_error']:
        print(f'Error in the fit while adding image n {j}')
        raise err_dict['error'] 

NameError: name 'torch' is not defined