# import libraries

In [1]:
import matplotlib; matplotlib.use('agg')
import mvpa2.suite as mv
import numpy as np
import pandas as pd
from scipy import stats
from scipy.io import wavfile
import sys, os, time, csv
from sklearn.linear_model import RidgeCV
from nilearn.plotting import plot_surf
import matplotlib.pyplot as plt
from tikreg import models
from tikreg import utils as tikutils

from tikreg import models, utils as tikutils
from tikreg import spatial_priors, temporal_priors


  import h5py.highlevel  # >= 2.8.0, https://github.com/h5py/h5py/issues/1063
  from numpy.testing.decorators import skipif
  @attr.s(cmp=False, hash=False)


In [2]:
%matplotlib inline
import matplotlib
matplotlib.rcParams['figure.figsize'] = [7., 7.]
matplotlib.rcParams['font.size'] = 15


# parameters

In [3]:
participants = ['sub-rid000001']
hemispheres = ['lh']

tr_movie = {1:369, 2:341, 3:372, 4:406}
tr_fmri = {1:374, 2:346, 3:377, 4:412}
tr_length = 2.5
n_samples = 1509
n_vertices = 40962
n_proc = 32     # how many cores do we have?
n_medial = {'lh': 3486, 'rh': 3491}

# mvpa_dir = '/idata/DBIC/cara/life/pymvpa/'
sam_data_dir = '/Users/h/Documents/projects_local/life-encoding-sandbox/data'
# ridge_dir = '/idata/DBIC/cara/life/ridge'
# cara_data_dir = '/idata/DBIC/cara/life/data'
npy_dir = '/Users/h/Documents/projects_local/life-encoding-sandbox/w2v_features'


# functions

In [4]:
def get_visual_stim_for_fold(stimfile, fold_shifted, included):
    cam = np.load(os.path.join(npy_dir, '{0}.npy'.format(stimfile)))
    # motion = np.load('/ihome/cara/global_motion/motion_downsampled_complete.npy')
    #
    # motion_list = []
    # motion_list.append(motion[:369])
    # motion_list.append(motion[369:710])
    # motion_list.append(motion[710:1082])
    # motion_list.append(motion[1082:])

    full_stim = []
    full_stim.append(cam[:369,:])
    full_stim.append(cam[369:710,:])
    full_stim.append(cam[710:1082,:])
    full_stim.append(cam[1082:,:])

    for i in range(len(full_stim)):
        # m = motion_list[i]
    	# m_avg = np.mean(np.vstack((m[3:], m[2:-1], m[1:-2], m[:-3])),axis=0)
    	# m_avg = np.reshape(m_avg,(-1,1))

        this = full_stim[i]
        # full_stim[i] = np.concatenate((m_avg, this[3:,:], this[2:-1,:], this[1:-2,:], this[:-3,:]), axis=1)
        full_stim[i] = np.concatenate((this[3:,:], this[2:-1,:], this[1:-2,:], this[:-3,:]), axis=1)

    train_stim = [full_stim[i] for i in np.subtract(included, 1)]
    test_stim = full_stim[fold_shifted-1]

    return train_stim, test_stim

In [5]:
def get_mel():
    mel_list = [[],[],[],[]]
    directory = os.path.join(cara_data_dir, 'spectral', 'complete')
    for f in os.listdir(directory):
        if 'csv' in f:
            run = int(f[-5])
            s = pd.read_csv(os.path.join(directory, f))
            filter_col = [col for col in s if col.startswith('mel')]
            tr_s = np.array(s[filter_col])
            tr_avg = np.mean(tr_s, axis=1)

            groupby = tr_avg.shape[0] / tr_movie[run]
            remainder = tr_avg.shape[0] % tr_movie[run]
            tr_reshaped = np.reshape(tr_avg[:-remainder], (tr_movie[run], groupby))
            avg = np.mean(tr_reshaped, axis=1)
            mel_list[run-1] = avg
    return mel_list

In [6]:
def get_ws_data(test_p, fold_shifted, included, hemi):
    print('\nLoading fMRI GIFTI data for HA in test subj space and using {0} as test participant...'.format(test_p))
    train_resp = []
    for run in included:
        avg = []
        if run == 4:
            resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(test_p, tr_fmri[run], run, hemi))).samples[4:-5,:]
        else:
            resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(test_p, tr_fmri[run], run, hemi))).samples[4:-4,:]

        resp = resp[:,cortical_vertices[hemi] == 1]
        mv.zscore(resp, chunks_attr=None)
        print('train', run, resp.shape)

        train_resp.append(resp)

    if fold_shifted == 4:
        test_resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(test_p, tr_fmri[fold_shifted], fold_shifted, hemi))).samples[4:-5,:]
    else:
        test_resp = mv.gifti_dataset(os.path.join(sam_data_dir, '{0}_task-life_acq-{1}vol_run-0{2}.{3}.tproject.gii'.format(test_p, tr_fmri[fold_shifted], fold_shifted, hemi))).samples[4:-4,:]

    test_resp = test_resp[:,cortical_vertices[hemi] == 1]
    mv.zscore(test_resp, chunks_attr=None)
    print('test', fold_shifted, test_resp.shape)

    return train_resp, test_resp

# sandbox parameters

In [7]:
model = 'visual'
align = 'ws'
# stimfile = 'all'
fold = 1
fold_shifted = fold+1
hemi = 'lh'
included = [1,2,3,4]
included.remove(fold_shifted)
test_p = 'sub-rid000001'

In [8]:
cortical_vertices = {}
for half in ['lh', 'rh']:
    test_ds = mv.niml.read('/Users/h/Documents/projects_local/life-encoding-sandbox/niml/ws.lh.niml.dset'.format(half))
    cortical_vertices[half] = np.ones((n_vertices))
    cortical_vertices[half][np.sum(test_ds.samples[1:, :] != 0, axis=0) == 0] = 0

  data_1d = np.fromstring(s, dtype=tp)


## features bg & action

In [9]:
stimfile = 'bg'
X1train_stim, X1test_stim = get_visual_stim_for_fold('{0}_{1}'.format(model, stimfile), fold_shifted, included)

stimfile = 'actions'
X2train_stim, X2test_stim = get_visual_stim_for_fold('{0}_{1}'.format(model, stimfile), fold_shifted, included)

Ytrain_uncon, Ytest = get_ws_data(test_p, fold_shifted, included, hemi)



Loading fMRI GIFTI data for HA in test subj space and using sub-rid000001 as test participant...



* deprecated from version: 2.1
* Will raise <class 'nibabel.deprecator.ExpiredDeprecationError'> as of version: 4.0
  samples = giftiio.read(samples)


('train', 1, (366, 37476))
('train', 3, (369, 37476))
('train', 4, (403, 37476))
('test', 2, (338, 37476))


In [56]:
# concatenate 3 runs
X1train = np.concatenate(X1train_stim)
X2train = np.concatenate(X2train_stim)

Ytrain = np.concatenate(Ytrain_uncon)

print(X1train.shape, "X1train" )
print(X2train.shape, "X2train")
print(X1test_stim.shape, "X1test_stim")
print(X2test_stim.shape, "X2test_stim")
print(Ytest.shape, "Ytest")
print(Ytrain.shape, "Ytrain")

((1138, 1200), 'X1train')
((1138, 1200), 'X2train')
((338, 1200), 'X1test_stim')
((338, 1200), 'X2test_stim')
((338, 37476), 'Ytest')
((1138, 37476), 'Ytrain')


# Part where I'm stuck: loro

In [11]:
train_id = np.arange(X1train.shape[0]).tolist()
dur1, dur2, dur3 = tr_movie[included[0]]-3, tr_movie[included[1]]-3,tr_movie[included[2]]-3

In [12]:
X1train.shape[0] #366+369+403

1138

## version 1
* Error: # AttributeError: 'list' object has no attribute 'reshape'


In [180]:

loro1 = [ (train_id[:dur1+dur2], train_id[dur1+dur2:]),                      
       ( list(np.concatenate((train_id[:dur1],train_id[dur1+dur2:]),axis=0)),
         train_id[dur1:dur1+dur2]),                                           
        (train_id[dur1:], train_id[:dur1])] 

In [168]:
print("loro first pair test: %s\nloro first pair val: %s" % (len(loro1[0][0]), len(loro1[0][1])))

loro first pair test: 735
loro first pair val: 403


## version 2
* error: operands could not be broadcast together with shapes (882000,1) (735,1200)


In [184]:

loro2 = [ (X1train[:dur1+dur2], X1train[dur1+dur2:]),                      
       ( list(np.concatenate((X1train[:dur1],X1train[dur1+dur2:]),axis=0)),
         X1train[dur1:dur1+dur2]),                                           
        (X1train[dur1:], X1train[:dur1])] 

In [185]:
print("loro first pair test: %s\nloro first pair val: %s" % (loro2[0][0].shape, loro2[0][1].shape))

loro first pair test: (735, 1200)
loro first pair val: (403, 1200)


In [186]:
loro2[0][0]

array([[ 0.06799316,  0.10040283,  0.07690811, ...,  0.02330017,
         0.04455566, -0.12078857],
       [ 0.06799316,  0.10040283,  0.07690811, ...,  0.02330017,
         0.04455566, -0.12078857],
       [ 0.06799316,  0.10040283,  0.07690811, ...,  0.02330017,
         0.04455566, -0.12078857],
       ...,
       [ 0.037407  ,  0.11371806, -0.00154876, ..., -0.01376162,
         0.11535781,  0.00118444],
       [ 0.037407  ,  0.11371806, -0.00154876, ..., -0.01376162,
         0.11535781,  0.00118444],
       [ 0.037407  ,  0.11371806, -0.00154876, ..., -0.01376162,
         0.11535781,  0.00118444]], dtype=float32)

## version 3
* operands could not be broadcast together with shapes (882000,1) (735,1200) 'safe'


In [170]:

loro3 = [ (np.concatenate((X1train_stim[0],X1train_stim[1]), axis = 0), X1train_stim[2]),
    (np.concatenate((X1train_stim[0],X1train_stim[2]), axis = 0), X1train_stim[1]),
    (np.concatenate((X1train_stim[1],X1train_stim[2]), axis = 0), X1train_stim[0]) ]


In [175]:
print("loro first pair test: %s\nloro first pair val: %s" % (loro3[0][0].shape, loro3[0][1].shape))

loro first pair test: (735, 1200)
loro first pair val: (403, 1200)


## loro check - https://github.com/gallantlab/tikreg/blob/master/tikreg/models.py line 642

In [99]:
Ytrain.shape[-1]

37476

In [126]:
for fdx,fold in enumerate(loro):
    trn,val = fold
    ntrn,nval = len(trn),len(val)
    print(ntrn,nval)
    txt = (fdx+1,3,ntrn,nval)
    print('train ridge fold  %i/%i: ntrain=%i, nval=%i'%txt)
    
#     Ktrn = tikutils.fast_indexing(X1train, trn, trn)
#     Kval = tikutils.fast_indexing(X1train, val, trn)

(735, 403)
train ridge fold  1/3: ntrain=735, nval=403
(769, 369)
train ridge fold  2/3: ntrain=769, nval=369
(772, 366)
train ridge fold  3/3: ntrain=772, nval=366


In [122]:
a.reshape(-1,1).shape

(882000, 1)

# tikreg - one example

In [17]:

# Sampling in terms of ratios and scalings
alphas = np.logspace(0,4,11)
ratios = np.logspace(-2,2,25)

# Solve for one hyperparameter set only
# We will use this solution to test the tikreg implementation
ratio = ratios[16]
alpha = alphas[1]

angle = np.arctan(ratio)
lambda_one = np.cos(angle)*alpha
lambda_two = np.sin(angle)*alpha


bands = np.asarray([lambda_one]*X1train.shape[1] + [lambda_two]*X2train.shape[1])
Cinv = np.diag(bands**-1)

A = np.hstack([X1train/lambda_one, X2train/lambda_two])
U, S, VT = np.linalg.svd(A, full_matrices=False)
V = VT.T
UTY = np.dot(U.T, Ytrain)
D = np.diag(S / (S**2 + alpha**2))

solution_svd_standard = np.linalg.multi_dot([V, D, UTY])*alpha
solution_svd_bandstd2tik = np.dot(Cinv, solution_svd_standard)

print(np.rad2deg(angle), ratio, alpha, lambda_one, lambda_two)


(77.84183577448114, 4.6415888336127775, 2.51188643150958, 0.5290310600635536, 2.455544661025322)


In [18]:
# Use tikreg to find the solution
X1_prior = spatial_priors.SphericalPrior(X1train, hyparams=[lambda_one])
X2_prior = spatial_priors.SphericalPrior(X2train, hyparams=[lambda_two])
# A temporal prior is unnecessary, so we specify no delays
temporal_prior = temporal_priors.SphericalPrior(delays=[0]) # no delays

fit_banded_polar = models.estimate_stem_wmvnp(features_train=[X1train, X2train], 
                                              responses_train=Ytrain,
                                              features_test=[X1test_stim, X2test_stim],
                                              responses_test=Ytest,
                                              feature_priors=[X1_prior, X2_prior],
                                              temporal_prior=temporal_prior,
                                              ridges=[alpha],
                                              folds= (1,5), # 1x 5-fold cross-validation
                                              performance=True,
                                              normalize_hyparams=True,
                                              weights=True)

1/1: temporal 1/1=0.000, features 1/1=(0.2106, 0.9776)
pop.cv.best:  2.512, mean=0.2046, (25,50,75)pctl=(0.1580,0.2001,0.2464),(0.0<r>0.5): (37468,000)
Duration 0.0629[mins]


  unique_optima = np.vstack(set(tuple(row) for row in optima)) # get unique rows


37476 responses: ridge=    2.512, temporal=0.000, spatial=(0.211, 0.978) perf=0.0067
Total duration 0.1392[mins]


In [19]:
voxelwise_optimal_hyperparameters = fit_banded_polar['optima']
print(voxelwise_optimal_hyperparameters.shape)

(37476, 4)


# loro - estimate_stem_wmvnp

In [182]:
# Use tikreg to find the solution
X1_prior = spatial_priors.SphericalPrior(X1train, hyparams=[lambda_one])
X2_prior = spatial_priors.SphericalPrior(X2train, hyparams=[lambda_two])
# A temporal prior is unnecessary, so we specify no delays
temporal_prior = temporal_priors.SphericalPrior(delays=[0]) # no delays

fit_banded_polar = models.estimate_stem_wmvnp(features_train=[X1train, X2train], 
                                              responses_train=Ytrain,
                                              features_test=[X1test_stim, X2test_stim],
                                              responses_test=Ytest,
                                              feature_priors=[X1_prior, X2_prior],
                                              temporal_prior=temporal_prior,
                                              ridges=[alpha],
                                              folds= loro1, # 1x 5-fold cross-validation
                                              performance=True,
                                              normalize_hyparams=True,
                                              weights=True)

1/1: temporal 1/1=0.000, features 1/1=(0.2106, 0.9776)


AttributeError: 'list' object has no attribute 'reshape'

# loro - cvridge

In [188]:
# Use tikreg to find the solution
X1_prior = spatial_priors.SphericalPrior(X1train, hyparams=[lambda_one])
X2_prior = spatial_priors.SphericalPrior(X2train, hyparams=[lambda_two])
# A temporal prior is unnecessary, so we specify no delays
temporal_prior = temporal_priors.SphericalPrior(delays=[0]) # no delays

fit_banded_polar = models.cvridge(X1train, Ytrain,
                                              X1test_stim,Ytest,
#                                               feature_priors=[X1_prior, X2_prior],
#                                               temporal_prior=temporal_prior,
                                              ridges=[alpha],
                                              folds= loro3, # 1x 5-fold cross-validation
                                              performance=True,
                                              weights=True, 
                                  predictions = True
                                  
                                 )

Fitting *1* ridges, across *3* folds, and *1* "linear" kernel parameters
Caching *linear* kernel
Updating *linear* kernel 1/1:None
train ridge fold  1/3: ntrain=735, nval=403


ValueError: operands could not be broadcast together with shapes (882000,1) (735,1200) 