In [1]:
"""
Run the model fitting for FWRF model. 
There are a few different versions of fitting in this script, the input arguments tell which kind of fitting to do.
"""

# import basic modules
import sys
import os
import time
import numpy as np
from tqdm import tqdm
import gc
import torch
import argparse
import skimage.transform

# import custom modules
code_dir = '/user_data/mmhender/imStat/code/'
sys.path.append(code_dir)
from feature_extraction import texture_statistics_gabor, bdcn_features, sketch_token_features
from feature_extraction import texture_statistics_pyramid
from utils import nsd_utils, roi_utils, default_paths

from model_fitting import initialize_fitting, arg_parser, merge_features, fwrf_fit, fwrf_predict

fpX = np.float32
device = initialize_fitting.init_cuda()


#device: 1
device#: 0
device name: GeForce GTX TITAN X

torch: 1.8.1+cu111
cuda:  11.1
cudnn: 8005
dtype: torch.float32


In [37]:
fitting_type='sketch_tokens'
use_pca_st_feats = True
use_lda_st_feats = False
subject = 1
debug=True
min_pct_var = 95; max_pc_to_retain = 100; 

do_stack=False
subject=1
volume_space = True
up_to_sess = 10
n_ori = 4
n_sf = 4
nonlin_fn = False
padding_mode = 'circular';
group_all_hl_feats = True; \
sample_batch_size = 50; voxel_batch_size = 100; \
zscore_features = True; ridge = True; \
shuffle_images = False; random_images = False; random_voxel_data = False; \
do_fitting = True; do_val = True; do_varpart = True; date_str = None;
shuff_rnd_seed = 0; 
do_pca_pyr_hl = False;
map_ind = -1; \
n_prf_sd_out = 2; mult_patch_by_prf = True; \
downsample_factor = 1.0; do_nms = False

In [38]:
def save_all(fn2save, fitting_type):
    """
    Define all the important parameters that have to be saved
    """
    dict2save = {
    'subject': subject,
    'volume_space': volume_space,
    'fitting_type': fitting_type,
    'voxel_mask': voxel_mask,
    'brain_nii_shape': brain_nii_shape,
    'image_order': image_order,
    'voxel_index': voxel_index,
    'voxel_roi': voxel_roi,
    'voxel_ncsnr': voxel_ncsnr, 
    'aperture': aperture,
    'aperture_rf_range': aperture_rf_range,
    'models': models,        
    'n_prf_sd_out': n_prf_sd_out,
    'best_losses': best_losses,           
    'best_lambdas': best_lambdas,
    'best_params': best_params,       
    'lambdas': lambdas, 
    'val_cc': val_cc,
    'val_r2': val_r2,    
    'partial_masks': partial_masks, 
    'partial_version_names': partial_version_names,
    'stack_result': stack_result,
    'stack_result_lo': stack_result_lo,
    'partial_models_used_for_stack': partial_models_used_for_stack,
    'train_r2': train_r2, 
    'train_cc': train_cc,
    'zscore_features': zscore_features,        
    'ridge': ridge,
    'debug': debug,
    'up_to_sess': up_to_sess,
    'shuff_rnd_seed': shuff_rnd_seed
    }
    # Might be some more things to save, depending what kind of fitting this is
    if 'bdcn' in fitting_type:
        dict2save.update({
        'pc': pc,
        'min_pct_var': min_pct_var,
        'max_pc_to_retain': max_pc_to_retain,           
        'mult_patch_by_prf': mult_patch_by_prf,
        'do_nms': do_nms, 
        'downsample_factor': downsample_factor,
        })

    if 'sketch_tokens' in fitting_type:
        dict2save.update({
        'min_pct_var': min_pct_var,
        'max_pc_to_retain': max_pc_to_retain,           
        'use_pca_st_feats': use_pca_st_feats, 
        'use_lda_st_feats': use_lda_st_feats,
        })

    if 'pyramid' in fitting_type:
        dict2save.update({
        'pc': pc,
        'min_pct_var': min_pct_var,
        'max_pc_to_retain': max_pc_to_retain,   
        'feature_info':feature_info,
        'group_all_hl_feats': group_all_hl_feats,
        })

    if 'gabor' in fitting_type:
        dict2save.update({
        'feature_table_simple': _gabor_ext_simple.feature_table,
        'filter_pars_simple': _gabor_ext_simple.gabor_filter_pars,
        'orient_filters_simple': _gabor_ext_simple.filter_stack,  
        'feature_table_complex': _gabor_ext_complex.feature_table,
        'filter_pars_complex': _gabor_ext_complex.gabor_filter_pars,
        'orient_filters_complex': _gabor_ext_complex.filter_stack, 
        'feature_types_exclude': feature_types_exclude,
        'feature_info':feature_info,
        'nonlin_fn': nonlin_fn,
        'padding_mode': padding_mode,
        'autocorr_output_pix': autocorr_output_pix,
        'group_all_hl_feats': group_all_hl_feats,
        })

    print('\nSaving to %s\n'%fn2save)
    torch.save(dict2save, fn2save, pickle_protocol=4)

if date_str==0:
    date_str = None

if do_fitting==False and date_str is None:
    raise ValueError('if you want to start midway through the process (--do_fitting=False), then specify the date when training result was saved (--date_str).')

if do_fitting==True and date_str is not None:
    raise ValueError('if you want to do fitting from scratch (--do_fitting=True), specify --date_str=None (rather than entering a date)')

if do_fitting==False and (do_pca_pyr_hl or do_pca_st or do_pca_bdcn):
    raise ValueError('Cannot start midway through the process (--do_fitting=False) when doing pca, because the pca weight matrix is not saved in between trn/val.')

if 'pyramid' in fitting_type:
    model_name = initialize_fitting.get_pyramid_model_name(ridge, n_ori, n_sf, do_pca_hl = do_pca_pyr_hl)
#         feature_types_exclude = []
    feature_types_exclude = ['pixel']
    name1 = 'pyramid_texture'

elif 'gabor_texture' in fitting_type:        
    model_name = initialize_fitting.get_gabor_texture_model_name(ridge, n_ori, n_sf)
    feature_types_exclude = []
    name1 = 'gabor_texture'

elif 'gabor_solo' in fitting_type:        
    model_name = initialize_fitting.get_gabor_solo_model_name(ridge, n_ori, n_sf)
    feature_types_exclude = ['pixel', 'simple_feature_means', 'autocorrs', 'crosscorrs']
    name1 = 'gabor_solo'

elif 'bdcn' in fitting_type:
    model_name = initialize_fitting.get_bdcn_model_name(do_pca_bdcn, map_ind)   
    name1 = 'bdcn'

elif 'sketch_tokens' in fitting_type:
    if use_pca_st_feats:
        # not allowing both of these to be true
        use_lda_st_feats = False
    model_name = initialize_fitting.get_sketch_tokens_model_name(use_pca_st_feats, use_lda_st_feats)   
    name1 = 'sketch_tokens'

else:
    raise ValueError('your string for fitting_type was not recognized')

if 'plus_sketch_tokens' in fitting_type:
    model_name2 = initialize_fitting.get_sketch_tokens_model_name(use_pca_st_feats, use_lda_st_feats)   
    model_name = model_name + '_plus_' + model_name2
elif 'plus_bdcn' in fitting_type:
    model_name2 = initialize_fitting.get_bdcn_model_name(do_pca_bdcn, map_ind)
    model_name = model_name + '_plus_' + model_name2

if do_stack:
    model_name += '_stacked'

output_dir, fn2save = initialize_fitting.get_save_path(subject, volume_space, model_name, shuffle_images, random_images, random_voxel_data, debug, date_str)


Time Stamp: Oct-04-2021_2223_03

Will save final output file to /user_data/mmhender/imStat/model_fits/S01/sketch_tokens_pca/Oct-04-2021_2223_03_DEBUG/



In [6]:
# decide what voxels to use  
voxel_mask, voxel_index, voxel_roi, voxel_ncsnr, brain_nii_shape = roi_utils.get_voxel_roi_info(subject, volume_space)

sessions = np.arange(0,up_to_sess)
zscore_betas_within_sess = True
# get all data and corresponding images, in two splits. always fixed set that gets left out
trn_stim_data, trn_voxel_data, val_stim_data, val_voxel_data, \
        image_order, image_order_trn, image_order_val = nsd_utils.get_data_splits(subject, sessions=sessions, \
                                                             voxel_mask=voxel_mask, volume_space=volume_space, \
                                                              zscore_betas_within_sess=zscore_betas_within_sess, \
                                                              shuffle_images=shuffle_images, random_images=random_images, \
                                                                                         random_voxel_data=random_voxel_data)


if 'gabor' in fitting_type or 'sketch_tokens' in fitting_type or 'pyramid' in fitting_type:
    # For this model, the features are pre-computed, so we will just load them rather than passing in images.
    # Going to pass the image indices (into 10,000 dim array) instead of images to fitting and val functions, 
    # which will tell which features to load.
    trn_stim_data = image_order_trn
    val_stim_data = image_order_val

# More params for fitting
holdout_size, lambdas = initialize_fitting.get_fitting_pars(trn_voxel_data, zscore_features, ridge=ridge)
# Params for the spatial aspect of the model (possible pRFs)
aperture_rf_range = 1.1
aperture, models = initialize_fitting.get_prf_models(aperture_rf_range=aperture_rf_range)    
    


Volume space: ROI defs are located at: /lab_data/tarrlab/common/datasets/NSD/nsddata/ppdata/subj01/func1pt8mm/roi

3794 voxels of overlap between kastner and prf definitions, using prf defs
unique values in retino labels:
[-1.  0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10. 11. 12. 13. 14. 15. 16.
 17. 18. 19. 20. 21. 22. 23. 24. 25.]
0 voxels of overlap between face and place definitions, using place defs
unique values in categ labels:
[-1.  0. 26. 27. 28. 30. 31. 32. 33.]
1535 voxels are defined (differently) in both retinotopic areas and category areas

14913 voxels are defined across all areas, and will be used for analysis

Loading numerical label/name mappings for all ROIs:
[1, 2, 3, 4, 5, 6, 7]
['V1v', 'V1d', 'V2v', 'V2d', 'V3v', 'V3d', 'hV4']
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]
['V1v', 'V1d', 'V2v', 'V2d', 'V3v', 'V3d', 'hV4', 'VO1', 'VO2', 'PHC1', 'PHC2', 'TO2', 'TO1', 'LO2', 'LO1', 'V3B', 'V3A', 'IPS0', 'IPS1', 'IPS2', 'IPS

In [39]:
_feature_extractor = sketch_token_features.sketch_token_feature_extractor(subject=subject, device=device,\
                 use_pca_feats = use_pca_st_feats, min_pct_var = min_pct_var, max_pc_to_retain = max_pc_to_retain, \
                 use_lda_feats = use_lda_st_feats)

In [40]:
gc.collect()
torch.cuda.empty_cache()
print('\nStarting training...\n')
if shuff_rnd_seed==0:
    shuff_rnd_seed = int(time.strftime('%M%H%d', time.localtime()))       
if debug:
    print('flipping the models upside down to start w biggest pRFs')
    models = np.flipud(models)

# add an intercept
add_bias=True
# determines whether to shuffle before separating the nested heldout data for lambda and param selection. 
# always using true.
shuffle=True 
best_losses, best_lambdas, best_params, best_train_holdout_preds, holdout_trial_order = \
                    fwrf_fit.fit_fwrf_model(trn_stim_data, trn_voxel_data, \
                                               _feature_extractor, models, \
                                               lambdas, zscore=zscore_features, add_bias=add_bias, \
                                               voxel_batch_size=voxel_batch_size, holdout_size=holdout_size, \
                                               shuffle=shuffle, shuff_rnd_seed=shuff_rnd_seed, device=device, \
                                               dtype=fpX, debug=debug)
trn_holdout_voxel_data_pred = best_train_holdout_preds



Starting training...

flipping the models upside down to start w biggest pRFs
dtype = <class 'numpy.float32'>
device = cuda:0
trn_size = 6122 (90.0%)
Seeding random number generator: seed is 232204
Initializing for fitting
Clearing features from memory
---------------------------------------


Getting features for prf 0: [x,y,sigma] is [-0.55 -0.55 0.0400]
Loading pre-computed features from /user_data/mmhender/features/sketch_tokens/PCA/S1_PCA.npy
Size of features array for first prf model with this image set is:
(6803, 28)
Final size of feature matrix is:
(6803, 28)

Fitting version 0 of 1: full_model, 
fitting model    0 of 875 , voxels [ 14900:14912 ] of 14913
Getting features for prf 1: [x,y,sigma] is [-0.49 -0.55 0.0400]
Final size of feature matrix is:
(6803, 24)

Fitting version 0 of 1: full_model, 
fitting model    1 of 875 , voxels [ 14900:14912 ] of 14913
---------------------------------------
total time = 8.305824s
total throughput = 0.000557s/voxel
voxel throughput = 0.00

In [41]:
if 'plus' in fitting_type:
    pc = []
    for m in _feature_extractor.modules:           
        if hasattr(m, 'pct_var_expl'):
            pcm = [m.pct_var_expl, m.min_pct_var,  m.n_comp_needed]                  
        else:
            pcm = None
        pc.append(pcm)
else:
    m = _feature_extractor
    if hasattr(m, 'pct_var_expl'):
        pc = [m.pct_var_expl, m.min_pct_var,  m.n_comp_needed]
    else:
        pc = None

partial_masks, partial_version_names = _feature_extractor.get_partial_versions()


In [42]:
sys.stdout.flush()
val_cc=None
val_r2=None
train_cc=None
train_r2=None
stack_result=None
stack_result_lo=None
partial_models_used_for_stack=None

save_all(fn2save, fitting_type)   
print('\nSaved training results\n')        
sys.stdout.flush()


Saving to /user_data/mmhender/imStat/model_fits/S01/sketch_tokens_pca/Oct-04-2021_2223_03_DEBUG/all_fit_params


Saved training results



In [43]:
gc.collect()
torch.cuda.empty_cache()
print('about to start validation')
sys.stdout.flush()

val_cc, val_r2, val_voxel_data_pred  = fwrf_predict.validate_fwrf_model(best_params, models, val_voxel_data, \
                                                                    val_stim_data, _feature_extractor, \
                       sample_batch_size=sample_batch_size, voxel_batch_size=voxel_batch_size, debug=debug, dtype=fpX)

save_all(fn2save, fitting_type)

about to start validation
Clearing features from memory
Getting features for prf 0: [x,y,sigma] is [-0.55 -0.55 0.0400]
Loading pre-computed features from /user_data/mmhender/features/sketch_tokens/PCA/S1_PCA.npy
Size of features array for first prf model with this image set is:
(697, 28)
Final size of feature matrix is:
(697, 28)
Getting features for prf 1: [x,y,sigma] is [-0.49 -0.55 0.0400]
Final size of feature matrix is:
(697, 24)
Clearing features from memory
Getting predictions for voxels [0-99] of 14913

Evaluating version 0 of 1: full_model
Includes 100 features
number of zeros:
76
size of weights is:
torch.Size([100, 100])
Getting predictions for voxels [100-199] of 14913

Evaluating version 0 of 1: full_model
Includes 100 features
number of zeros:
72
size of weights is:
torch.Size([100, 100])
Getting predictions for voxels [200-299] of 14913

Saving to /user_data/mmhender/imStat/model_fits/S01/sketch_tokens_pca/Oct-04-2021_2223_03_DEBUG/all_fit_params



In [44]:
best_params[5]

array([[1],
       [1],
       [0],
       ...,
       [1],
       [1],
       [1]])