# Constrained and concatenated analysis F3C F4B

This analysis is very time consuming and automatic caching was used to make things run smoothly. Some of the code is structured in a slightly weird way to re-use and modify existing cached results, to save time.

Without caching ahead of time, this notebook should take about a day to run on a typical desktop machine.


## Prepare workspace

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

# Add local scripts to path
import os,sys
sys.path.insert(0,os.path.abspath("./"))
import neurotools

# Set up cache
from neurotools.jobs.initialize_system_cache import initialize_caches,cache_test
PYCACHEDIR = os.path.abspath('./')
CACHENAME  = 'PPC_cache'
from neurotools.tools import ensure_dir
ensure_dir(PYCACHEDIR+os.sep+CACHENAME)
initialize_caches(
    level1  = PYCACHEDIR,
    force   = False,
    verbose = False,
    CACHE_IDENTIFIER = CACHENAME)

# Import libraries
from neurotools.nlab import *
import ppc_data_loader

# Set this to the location of the PPC data on your machine
ppc_data_loader.path = '/home/mer49/Dropbox (Cambridge University)/Datasets/PPC_data/'
from ppc_data_loader   import *
from ppc_trial         import *

np.seterr(all='raise');
np.random.seed(0)

Data location is /home/mer49/Workspace2/PPC_data/


#### Configure Matplotlib

In [2]:
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 200
TEXTWIDTH = 5.62708
matplotlib.rcParams['figure.figsize'] = (TEXTWIDTH, TEXTWIDTH/sqrt(2))
import warnings
from matplotlib import MatplotlibDeprecationWarning
warnings.filterwarnings("ignore",category=MatplotlibDeprecationWarning)
SMALL_SIZE  = 7.5
MEDIUM_SIZE = 8
BIGGER_SIZE = 8.5
matplotlib.rc('font'  , size     =SMALL_SIZE ) # controls default text sizes
matplotlib.rc('axes'  , titlesize=MEDIUM_SIZE) # fontsize of the axes title
matplotlib.rc('axes'  , labelsize=MEDIUM_SIZE) # fontsize of the x and y labels
matplotlib.rc('xtick' , labelsize=SMALL_SIZE ) # fontsize of the tick labels
matplotlib.rc('ytick' , labelsize=SMALL_SIZE ) # fontsize of the tick labels
matplotlib.rc('legend', fontsize =SMALL_SIZE ) # legend fontsize
matplotlib.rc('figure', titlesize=BIGGER_SIZE) # fontsize of the figure title
matplotlib.rc('lines' , solid_capstyle='round')
print('Matplotlib configured')

Matplotlib configured


#### Set spans of (mostly) consecutive days with sufficient No. of units in common

In [3]:
NXVAL   = 10
REPL    = 100
NGRID   = 20
errtype = 'L1'

# Use these datasets
use = [(1,[1, 4, 5, 6, 7, 10, 14]),
       (3,[ 1,  2,  4, 6, 7,  8,  9, 10, 11, 12]),
       (4,[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
       (5,[6, 7, 8, 9, 10, 11, 12])]

print('Datasets defined...')

Datasets defined...


### Constrained analysis subroutines

These are parallellized, since the multiple crossvalidations (and permutation samples) are quite labor intensive.

 - Instead of just talking one set of 10-fold cross validations, we random select 1/10th of the data for validation, and sample this 100 times (to better cover the variability in the data)

#### Sweep constraint parameter

In [4]:
from ppc_analysis import kininfo,add_constant

DEBUG_ON = False
def constrained_analysis_sweep(x,y,NGRID=20):
    '''
    Estimate regularized linear decoders with a penalty
    on the change in weights across days. 
    
    Returns
    -------
    ww0: 
        List of initial weight vector guesses based on OLS
        on each session separately.
    wwc: 
        Concatenated weight vector guess based on OLS for 
        all sessions concatenated.
    allww: 
        List of constrained weight fits
    ll: 
        Interpolation weights between the single-day and the
        concatenated scenarios. 
    '''
    x = list(map(np.array,x))
    y = list(map(np.array,y))
    # Same-day fits
    ww0 = array([reglstsq(xi,yi) for (xi,yi) in zip(x,y)])[:,:,0]
    # Concatenated fit
    wc  = reglstsq(cat(x),cat(y))
    wwc = outer(ones(len(x)),wc)
    # Compute covariances and cross covariances
    sYY = np.array([(yi.T @ yi)/yi.shape[0] for  yi     in y       ])
    sXX = np.array([(xi.T @ xi)/xi.shape[0] for  xi     in x       ])
    sXY = np.array([(xi.T @ yi)/yi.shape[0] for (xi,yi) in zip(x,y)])  
    # Baseline values for eror and jacobian of OLS penalty
    e0 = np.sum(sYY)
    j0 = -2*sXY.ravel()
    # Inter-day difference operator,
    # defines quadratic penalty on the weigth changes.
    D = len(x)
    G = (-eye(D)+eye(D,k=1))[:-1,:]
    Q = G.T@G
    if DEBUG_ON:
        print('ww0',ww0.shape)
        print('sXX',sXX.shape)
        print('Q',Q.shape)
    # Error functions for the OLS and Δw penalties
    err1 = lambda w:e0+einsum('is,ist,it',w,sXX,w)-2*np.sum(w*sXY)
    err2 = lambda w:einsum('si,st,ti',w,Q,w)
    # Rescale both OLS and constraint contribution to objective to be similar
    # (improved numeric conditioning, able to cover reasonable range of
    # constraint values with a fixed grid search)
    emin1    = err1(ww0) # Best-case  OLS error: same-day
    emax1    = err1(wwc) # Worst-case OLS error: concatenated
    emin2    = 0         # Best-case  Δw penalty: concatenated (0 Δw)
    emax2    = err2(ww0) # Worst-case Δw penalty: same-day fits
    scale_e1 = 1/(emax1-emin1) 
    scale_e2 = 1/(emax2-emin2) 
    # Grid search over convex combinations
    ll = linspace(0,1,NGRID)
    allww = [ww0.ravel(),ww0.ravel()]
    for l in ll:
        w0 = (2*allww[-1]-allww[-2]).ravel()
        # Objective and Jacobian combining the OLS and Δw penalties
        a,b = (1-l)*scale_e1, l*scale_e2
        def objective(w):
            w  = w.reshape(ww0.shape)
            return a*(err1(w)-emin1) + b*(err2(w)-emin2)
        def jacobian(w):
            w  = w.reshape(ww0.shape) 
            j1 = 2*einsum('ist,it->is',sXX,w).ravel()+j0
            j2 = 2*einsum('si,st->ti' ,w,Q  ).ravel()
            return a*j1 + b*j2
        # ("minimize_retry" wraps scipy.optimize.minimize
        # It tries faster gradient-based optimizers first, 
        # but resorts to the simplex algorithm if that fails)
        allww.append(
            minimize_retry(objective,w0,jacobian,
            tol          =1e-6,
            show_progress=False,
            printerrors  =False))
    allww = array(allww[2:]).reshape((NGRID,)+ww0.shape)
    return ww0,wwc,allww,ll

Defined subroutines
Defined LMS algorithm


#### Sweep constraint parameter with crossvalidation 

In [5]:
def constrained_sweep_crossvalidated(X,Y,NXVAL=10,NGRID=20,errmethod='L1',matched=True):
    '''
    '''
    efn = neurotools.stats.error_functions[errmethod]
    X   = [array(x) for x in X]
    Y   = [array(y) for y in Y]
    D   = len(X)
    N   = len(cat(X))
    M   = N/D**2/NXVAL
    def partition(x,y):
        K      = len(x)
        trials = arange(K)
        Ntest  = int(M           if matched else K/NXVAL)
        Ntrain = int(M*(NXVAL-1) if matched else K-Ntest)
        train  = int32(np.random.choice(trials,Ntrain,replace=False))
        test   = int32(np.random.choice(list(set(trials)-set(train)),Ntest,replace=False))
        return x[train],y[train],x[test],y[test]
    trnX,trnY,tstX,tstY = [amap(cat,v) for v in zip(*[partition(x,y) for (x,y) in zip(X,Y)])]
    ww0,wwc,allww,ll = constrained_analysis_sweep(trnX,trnY,NGRID)
    results = cat([[(0,ww0)],list(zip(ll,allww)),[(1,wwc)]])
    return [{'MAW'  :mean(abs(w)),
             'RMSW' :mean(abs(w)**2)**0.5,
             'MADW' :mean(abs(diff(w,axis=0))),
             'RMSDW':mean(abs(diff(w,axis=0))**2)**0.5,
             'MAY'  :mean([mean(abs(y-mean(y))) for y in tstY]), 
             'MAE'  :mean([mean(abs(y-x@w)) for (w,x,y) in zip(w,tstX,tstY)]),
             'RMSE' :mean([mean((y-x@w)**2) for (w,x,y) in zip(w,tstX,tstY)])**0.5,
             'MERR' :mean([efn(y,x@w)       for (w,x,y) in zip(w,tstX,tstY)])} 
            for (l,w) in results],results

@memoize
def get_data_constrained_analysis_2(animal,sessions,predict,
    permute=False,
    split=1,
    do_add_constant=True):    
    '''
    Get data pre-processed for performing the constrained analyses. 

    We extract good trials, z-score the dF/F calcium signals, and zero-mean
    the kinematic variables, within each trial. 

    Parameters
    ----------
    animal: int
        Which subject to use
    sessions: list of ints
        Which sessions to use
    predict: int
        Which kinematic variable to predict

    Other Parametes
    ---------------
    permute: bool, default False
        Whether to randomly scramble the neuronal identities. 
        Used for shuffle chance level assessment. 
    split: int, default 1
        Split days into `split` pieces. 
    do_add_constant: bool, default True
        Whether to add a constant offset feature to the neural data
    
    Returns
    -------
    X: list
        List of neural trial data for each session
    Y: list
        List of kinematic trial data for each session
    '''
    # Get units in common
    units,uidxs = get_units_in_common(animal,sessions)
    X,Y = [],[]
    if not permute in {True,False,'pair'}:
        raise ValueError('Param `permute` should be True,False, or "pair"')
    if permute=='pair':
        a,b = sorted(choice(range(len(sessions)),2,False))
        sessions = [sessions[a],sessions[b]]
        permute = True
    for s in sessions:
        # Get trials for this session
        f  = get_dFF(animal,s)[:,units]
        if permute:
            f = f[:,np.random.permutation(len(units))]
        k  = kininfo[predict]['get'](animal,s)
        if do_add_constant:
            x  = array([add_constant(x) for x in extract_in_trial(f,animal,s,dozscore=True)])
        else:
            x  = array([x for x in extract_in_trial(f,animal,s,dozscore=True)])
        y  = array(extract_in_trial(k,animal,s,dozeromean=True))
        n  = len(x)
        b  = int(n//split)
        for i in range(split):
            X += [x[i*b:] if i==split-1 else x[i*b:(i+1)*b]]
            Y += [y[i*b:] if i==split-1 else y[i*b:(i+1)*b]]
    return X,Y

def compute_constrained_sweep(animal,sessions,predict,
                              NXVAL=10,
                              REPL=1,
                              NGRID=20,
                              errtype='L1',
                              matched=False,
                              permute=False,
                              split=1):
    '''
    Helper function for computing constrained models. 
    '''
    # Handle head-direction as a special case: use a circular
    # erorr function (otherwise default to L1)
    emth = (errtype+'_degrees') if predict==4 else errtype
    # Prepare data for analysis
    X,Y  = get_data_constrained_analysis_2(animal,sessions,predict,permute,split)
    # Fit multiple models, interpolating between single-day and concatenated
    return constrained_sweep_crossvalidated(X,Y,NXVAL,NGRID,emth,matched)

# Parallel run of all cross-validation samples
def constrained_analysis_helper(p):
    '''
    Small wrapper function to allow parallel computation. 
    This is necessary for older versions of python
    '''
    i,p = p
    return i,compute_constrained_sweep(*p)[0]

@memoize
def do_parallel_constrained_analysis(animal,sessions,predict,
                                     NXVAL=10,
                                     REPL=100,
                                     NGRID=20,
                                     errtype='L1',
                                     matched=False):
    '''
    '''
    animal = animal # weird cache stuff; ignore but don't remove this line
    reset_pool()
    jobs = [
        (animal,sessions,predict,NXVAL,1,NGRID,errtype,matched,False) 
        for i in range(REPL)
        ]+[
        (animal,sessions,predict,NXVAL,1,NGRID,errtype,matched,True) 
        for i in range(REPL)
        ]+[
        (animal,sessions,predict,NXVAL,1,NGRID,errtype,matched,'pair') 
        for i in range(REPL)
        ]
    results = parmap(constrained_analysis_helper,enumerate(jobs),debug=DEBUG_ON)
    print('done')
    rawresults  = results[REPL*0:REPL*1]
    shuffle     = results[REPL*1:REPL*2]
    pairshuffle = results[REPL*2:REPL*3]
    return rawresults,shuffle,pairshuffle

# Export preprocessed data for independent validation

In [6]:
savedr = './datafiles/concatenated_analyses_processed_extracted_features/'
ensure_dir(savedr)
cached = {}

for animal, sessions in use:
    for predict in kininfo.keys():
        print('M',animal, kininfo[predict]['name'])
        X,Y    = get_data_constrained_analysis_2(animal,sessions,predict,
                                                 permute=False,
                                                 split=1,
                                                 do_add_constant=False)
        
        cached[animal,tuple(sessions),predict] = (X,Y)
        kname  = kininfo[predict]['name'].replace(' ','_')
        ss     = '_'.join(map(str,sessions))
        daymap = dict(zip(get_session_ids(animal),get_days(animal)))
        days   = array([daymap[s] for s in sessions])
        dd     = '_'.join(map(str,days))
        saveas = 'concatenated_analyses_data_M%d_sessions_%s_days_%s_variable_%s.mat'%(animal,ss,dd,kname)
        print('Saving to','\n'+saveas,'\nin folder',savedr)
        
        # Mathfiles are weird this is a hack patch
        # TODO: this properly later
        savemat(savedr+saveas,{'X':X,'Y':Y})
        data   = loadmat(savedr+saveas)
        x      = data['X'].squeeze()
        y      = data['Y'].squeeze()
        x      = array([xi[0] for xi in x])
        y      = array([array([yii.T for yii in yi[0]]) for yi in y])
        savemat(savedr+saveas,{'X':x,'Y':y})
        
        # print a summary for sanity checks
        nsamples = [cat(yi).shape[0] for yi in y]
        #print('Samples per session',nsamples)
        #print('Trials per sessions',[yi.shape[0] for yi in y])
        #print('%d neurons'%x[0][0].shape[1])
        #print('Average samples per session',int(.5+mean(nsamples)))


M 1 X position
Saving to 
concatenated_analyses_data_M1_sessions_1_4_5_6_7_10_14_days_1_4_5_6_7_11_15_variable_X_position.mat 
in folder ./datafiles/concatenated_analyses_processed_extracted_features/
M 1 Y position
Saving to 
concatenated_analyses_data_M1_sessions_1_4_5_6_7_10_14_days_1_4_5_6_7_11_15_variable_Y_position.mat 
in folder ./datafiles/concatenated_analyses_processed_extracted_features/
M 1 X velocity
Saving to 
concatenated_analyses_data_M1_sessions_1_4_5_6_7_10_14_days_1_4_5_6_7_11_15_variable_X_velocity.mat 
in folder ./datafiles/concatenated_analyses_processed_extracted_features/
M 1 Y velocity
Saving to 
concatenated_analyses_data_M1_sessions_1_4_5_6_7_10_14_days_1_4_5_6_7_11_15_variable_Y_velocity.mat 
in folder ./datafiles/concatenated_analyses_processed_extracted_features/
M 1 Head direction
Saving to 
concatenated_analyses_data_M1_sessions_1_4_5_6_7_10_14_days_1_4_5_6_7_11_15_variable_Head_direction.mat 
in folder ./datafiles/concatenated_analyses_processed_extract

In [7]:
# Verify that X is the same for all kinematic variables
for animal, sessions in use:
    XX = []
    for predict in kininfo.keys():
        XX.append(cached[animal,tuple(sessions),predict][0])
    ns = len(XX[0])
    for i in range(5):
        for j in range(5):
            for n in range(ns):
                ntr = len(XX[0][n])
                for r in range(ntr):
                    assert(mean(abs(XX[i][n][r]-XX[j][n][r])) < 1e-9)
    print('Passed:',animal,sessions)

Passed: 1 [1, 4, 5, 6, 7, 10, 14]
Passed: 3 [1, 2, 4, 6, 7, 8, 9, 10, 11, 12]
Passed: 4 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Passed: 5 [6, 7, 8, 9, 10, 11, 12]


In [8]:
# Verify that X is the same for all kinematic variables
cached2 = {}
for animal, sessions in use:
    XX = []
    for predict in kininfo.keys():
        kname  = kininfo[predict]['name'].replace(' ','_')
        ss     = '_'.join(map(str,sessions))
        daymap = dict(zip(get_session_ids(animal),get_days(animal)))
        days   = array([daymap[s] for s in sessions])
        dd     = '_'.join(map(str,days))
        saveas = './concatenated_analyses_data_M%d_sessions_%s_days_%s_variable_%s.mat'%(animal,ss,dd,kname)
        data   = loadmat(savedr+saveas)
        x      = data['X'].squeeze()
        y      = data['Y'].squeeze()
        XX.append(x)
    ns = len(XX[0])
    for i in range(5):
        for j in range(5):
            for n in range(ns):
                ntr = XX[0][n].shape[1]
                for r in range(ntr):
                    assert(mean(abs(XX[i][n][0,r]-XX[j][n][0,r])) < 1e-9)
    print('Passed:',animal,sessions)

Passed: 1 [1, 4, 5, 6, 7, 10, 14]
Passed: 3 [1, 2, 4, 6, 7, 8, 9, 10, 11, 12]
Passed: 4 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Passed: 5 [6, 7, 8, 9, 10, 11, 12]


### Pair shuffle tests

In [9]:
# Just pair shuffle
@memoize
def do_pairshuffle(animal,sessions,predict,
                                     NXVAL=10,
                                     REPL=100,
                                     NGRID=20,
                                     errtype='L1',
                                     matched=False):
    '''
    Shuffle tests with just two trials
    '''
    animal = animal # weird cache stuff; ignore but don't remove this line
    reset_pool()
    jobs = [
        (animal,sessions,predict,NXVAL,1,NGRID,errtype,matched,'pair') 
        for i in range(REPL)
        ]
    pairshuffle = parmap(constrained_analysis_helper,enumerate(jobs),debug=DEBUG_ON)
    print('done')
    return pairshuffle

# Just pair shuffle
@memoize
def do_pairshuffle_v2(animal,sessions,predict,
                                     NXVAL=10,
                                     REPL=100,
                                     NGRID=20,
                                     errtype='L1',
                                     matched=False):
    '''
    '''
    animal = animal # weird cache stuff; ignore but don't remove this line
    reset_pool()
    jobs = [
        (animal,sessions,predict,NXVAL,1,NGRID,errtype,matched,'pair') 
        for i in range(REPL)
        ]
    pairshuffle = parmap(constrained_analysis_helper,enumerate(jobs),debug=DEBUG_ON)
    print('done')
    return pairshuffle

def check_basic_same_day_models(x,y,NGRID=20):
    x = list(map(np.array,x))
    y = list(map(np.array,y))
    ww0 = array([reglstsq(xi,yi) for (xi,yi) in zip(x,y)])[:,:,0]
    return ww0

### Get decoding chance level

In [10]:
@memoize
def get_chance_level(animal,sessions,predict,
    REPL     = 100,
    NXVAL    = 10,
    errtype  = 'L1',
    matched  = True):

    all_results = []
    print('Computing chance level')
    for iteration in progress_bar(range(REPL)):
        # This will get the neural and kinematic data for all sessions
        X,Y = get_data_constrained_analysis_2(animal,sessions,predict,False,1)

        # Now split into 10-fold training/testing sets
        errmethod = (errtype+'_degrees') if predict==4 else errtype
        efn = neurotools.stats.error_functions[errmethod]
        X   = [array(x) for x in X]
        Y   = [array(y) for y in Y]
        D   = len(X)
        N   = len(cat(X))
        M   = N/D**2/NXVAL
        def partition(x,y):
            K      = len(x)
            trials = arange(K)
            Ntest  = int(M           if matched else K/NXVAL)
            Ntrain = int(M*(NXVAL-1) if matched else K-Ntest)
            train  = int32(np.random.choice(trials,Ntrain,replace=False))
            test   = int32(np.random.choice(list(set(trials)-set(train)),Ntest,replace=False))
            return x[train],y[train],x[test],y[test]
        trnX,trnY,tstX,tstY = [amap(cat,v) for v in zip(*[partition(x,y) for (x,y) in zip(X,Y)])]

        # Shuffle Kinematics
        trnY = [block_shuffle(trny,50) for trny in trnY]
        tstY = [block_shuffle(tsty,50) for tsty in tstY]

        # Now we build models on each training set
        ww0 = check_basic_same_day_models(trnX,trnY,NGRID)

        # Check model performance
        results = [(0,ww0)]
        all_results.append(([{'MAW'  :mean(abs(w)),
                 'RMSW' :mean(abs(w)**2)**0.5,
                 'MADW' :mean(abs(diff(w,axis=0))),
                 'RMSDW':mean(abs(diff(w,axis=0))**2)**0.5,
                 'MAY'  :mean([mean(abs(y-mean(y))) for y in tstY]), 
                 'MAE'  :mean([mean(abs(y-x@w)) for (w,x,y) in zip(w,tstX,tstY)]),
                 'RMSE' :mean([mean((y-x@w)**2) for (w,x,y) in zip(w,tstX,tstY)])**0.5,
                 'MERR' :mean([efn(y,x@w)       for (w,x,y) in zip(w,tstX,tstY)])} 
                for (l,w) in results],results))
    return all_results

def get_chance_level_v2(animal,sessions,predict,
    NXVAL    = 10,
    errtype  = 'L1',
    matched  = True):

    # This will get the neural and kinematic data for all sessions
    X,Y = get_data_constrained_analysis_2(animal,sessions,predict,False,1)

    # Now split into 10-fold training/testing sets
    errmethod = (errtype+'_degrees') if predict==4 else errtype
    efn = neurotools.stats.error_functions[errmethod]
    X   = [array(x) for x in X]
    Y   = [array(y) for y in Y]
    D   = len(X)
    N   = len(cat(X))
    M   = N/D**2/NXVAL
    def partition(x,y):
        K      = len(x)
        trials = arange(K)
        Ntest  = int(M           if matched else K/NXVAL)
        Ntrain = int(M*(NXVAL-1) if matched else K-Ntest)
        train  = int32(np.random.choice(trials,Ntrain,replace=False))
        test   = int32(np.random.choice(list(set(trials)-set(train)),Ntest,replace=False))
        return x[train],y[train],x[test],y[test]
    trnX,trnY,tstX,tstY = [amap(cat,v) for v in zip(*[partition(x,y) for (x,y) in zip(X,Y)])]

    # Shuffle Kinematics
    trnY = [block_shuffle(trny,50) for trny in trnY]
    tstY = [block_shuffle(tsty,50) for tsty in tstY]

    # Now we build models on each training set
    ww0 = check_basic_same_day_models(trnX,trnY,NGRID)

    # Check model performance
    results = [(0,ww0)]
    return ([{'MAW'  :mean(abs(w)),
             'RMSW' :mean(abs(w)**2)**0.5,
             'MADW' :mean(abs(diff(w,axis=0))),
             'RMSDW':mean(abs(diff(w,axis=0))**2)**0.5,
             'MAY'  :mean([mean(abs(y-mean(y))) for y in tstY]), 
             'MAE'  :mean([mean(abs(y-x@w)) for (w,x,y) in zip(w,tstX,tstY)]),
             'RMSE' :mean([mean((y-x@w)**2) for (w,x,y) in zip(w,tstX,tstY)])**0.5,
             'MERR' :mean([efn(y,x@w)       for (w,x,y) in zip(w,tstX,tstY)])} 
            for (l,w) in results],results)

def get_chance_level_helper(p):
    i,(animal,sessions,predict,NXVAL,errtype,matched) = p
    return i,get_chance_level_v2(animal,sessions,predict,NXVAL,errtype,matched)
reset_pool()

@memoize
def get_chance_level_parallel(animal,sessions,predict,
    REPL     = 100,
    NXVAL    = 10,
    errtype  = 'L1',
    matched  = True):
    print('Computing chance level (parallel)')
    jobs = [(animal,sessions,predict,NXVAL,errtype,matched) for i in range(REPL)]
    results = parmap(get_chance_level_helper,enumerate(jobs),debug=False)
    return results

# Precompute and preload results

In [None]:
# Precompute the results
animal,sessions,predict = 4,(1, 2, 3, 4, 5, 6, 7, 8, 9, 10),1
results,shuffle,pairshuffle = do_parallel_constrained_analysis(animal,sessions,predict)
#pairshuffle = do_pairshuffle(animal,sessions,predict)

oldset = {(1,(1, 4, 5, 6, 7, 10, 14),1),
    (1,(1, 4, 5, 6, 7, 10, 14),3),
    (1,(1, 4, 5, 6, 7, 10, 14),4),
    (3,(1, 2, 4, 6, 7, 8, 9, 10, 11, 12),1),
    (3,(1, 2, 4, 6, 7, 8, 9, 10, 11, 12),3),
    (3,(1, 2, 4, 6, 7, 8, 9, 10, 11, 12),4)}

REPL = 500

def get_all_data(animal,sessions,predict):
    sessions = tuple(sessions)
    results,shuffle,pairshuffle = do_parallel_constrained_analysis(animal,sessions,predict,REPL=REPL)
    chance = get_chance_level_parallel(animal,sessions,predict,REPL=REPL)
    if len(chance)==2:
        chance = chance[1]
        chance = [ci[0] for ci in chance]
    else:
        chance = [ci[0][0] for ci in chance]
    return results,shuffle,pairshuffle,chance

for animal, sessions in use:
    for predict in [1,3,4]:
        print('Subject',animal,'Kinematics',predict,'Sessions',sessions)
        get_all_data(animal,sessions,predict)

Subject 1 Kinematics 1 Sessions [1, 4, 5, 6, 7, 10, 14]
[##################################################]100.0% 
done
Computing chance level (parallel)
[##################################################]100.0% 



Stopping.

Subject 1 Kinematics 3 Sessions [1, 4, 5, 6, 7, 10, 14]
[##################################################]100.0% 
done
Computing chance level (parallel)
[##################################################]100.0% 
Subject 1 Kinematics 4 Sessions [1, 4, 5, 6, 7, 10, 14]
[##################################################]100.0% 
done
Computing chance level (parallel)
[##################################################]100.0% 



Stopping.

Subject 3 Kinematics 1 Sessions [1, 2, 4, 6, 7, 8, 9, 10, 11, 12]
[##################################################]100.0% 
done
Computing chance level (parallel)
[##################################################]100.0% 
Subject 3 Kinematics 3 Sessions [1, 2, 4, 6, 7, 8, 9, 10, 11, 12]
[##################################################]100.0% 
done
Computing chance level (parallel)
[##################################################]100.0% 
Subject 3 Kinematics 4 Sessions [1, 2, 4, 6, 7, 8, 9, 10, 11, 12]
[##################################################]100.0% 
done
Computing chance level (parallel)
[##################################################]100.0% 
Subject 4 Kinematics 1 Sessions [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[##################################################]100.0% 
done
Computing chance level (parallel)
[##################################################]100.0% 
Subject 4 Kinematics 3 Sessions [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[#####################################

### Sanity checks on loaded results

In [None]:
for animal, sessions in use:
    for kinematics_predict in [1,3,4]:
        print('Subject',animal,'Kinematics',kinematics_predict,'Sessions',sessions)
        r,s,p,c = get_all_data(animal,sessions,predict)
        print(amap(len,(r,s,p,c)))

# Fig 4B

In [None]:
%matplotlib inline
matplotlib.rcParams['figure.dpi']=120

def constrained_plot_percentage_v2(animal,sessions,predict):
    results,shuffle,pairshuffle,ch = get_all_data(animal,sessions,predict)

    # Get prediction errors
    r2    = array(list(zip(*results)))
    mae   = array([[r['MERR'] for r in rr] for rr in r2])
    maw   = array([[r['MAW' ] for r in rr] for rr in r2])
    madw  = array([[r['MADW'] for r in rr] for rr in r2])
    dwpct = median(madw,axis=1)/median(maw,axis=1)*100
    mmae  = median(mae,axis=1)
    
    # Get shuffle control results
    s2     = array(list(zip(*shuffle)))
    smae   = array([[r['MERR'] for r in rr] for rr in s2])
    smaw   = array([[r['MAW' ] for r in rr] for rr in s2])
    smadw  = array([[r['MADW'] for r in rr] for rr in s2])
    sdwpct = median(smadw,axis=1)/median(smaw,axis=1)*100
    smmae  = median(smae,axis=1)
    shuffled = smae[-2,:]
    
    # Get pair-only shuffle control results
    ps2     = array(list(zip(*pairshuffle)))
    psmae   = array([[r['MERR'] for r in rr] for rr in ps2])
    psmaw   = array([[r['MAW' ] for r in rr] for rr in ps2])
    psmadw  = array([[r['MADW'] for r in rr] for rr in ps2])
    psdwpct = median(psmadw,axis=1)/median(psmaw,axis=1)*100
    psmmae  = median(psmae,axis=1)
    pshuffled = psmae[-2,:]
    
    # Get chance level 
    chae   = array([r['MERR'] for r in ch])
    chance_percent_normalize = mean(chae)/100*2
    
    # Convert units to percent of chance error
    mae  = mae /chance_percent_normalize
    smae = smae/chance_percent_normalize
    mmae = mmae/chance_percent_normalize
    
    # Draw |e| vs Δw plot, with endpoints and dashed lines to axis
    lw=2
    plot(dwpct,mmae,color=BLACK,lw=lw)
    scatter([dwpct[0],dwpct[-1]],[mmae[0],mmae[-1]],marker='o',s=8,color=BLACK)

    scale = np.max(dwpct)/100
    widthscale = 10
    bw = widthscale*scale
    blw = 1.5
    
    # Draw concatenated error box
    colored_boxplot([mae[-2,:]],[-15*scale],TURQUOISE,widths=bw,zorder=100,linewidth=blw)

    # Draw same-day error box
    x2 = np.max(dwpct)+15*scale
    colored_boxplot([mae[1,:]],[x2],OCHRE,widths=bw,zorder=100,linewidth=blw)
    
    simpleraxis()
    xlabel(r'$\left<|\Delta w|\right>$/session (%)')
    xlim(-27*scale,x2+10*scale)
    
    shuffled = smae[-2,:]
    span1   = np.max(shuffled) -np.min(shuffled)
    scenter = (np.max(shuffled)+np.min(shuffled))*0.5
    
    yl = ylim()
    more_yticks()
    ylim(yl[0],ylim()[1])
    plot([dwpct[0]]*2 ,[ylim()[0],mmae[0]] ,color=BLACK,linestyle=':',lw=1)
    plot([dwpct[-1]]*2,[ylim()[0],mmae[-1]],color=BLACK,linestyle=':',lw=1)
    ylim(yl[0],ylim()[1])
    xt = [0,10,12,15,25,30,50,60,np.max(dwpct)]
    xticks(xt,['%d'%i for i in xt])

In [None]:
figure(figsize=(4.1,2.2))

animal = 3
sessions = [1,  2,  4,  6,  7,  8,  9, 10]
ax1 = subplot2grid((2,3),(0,0))
ax2 = subplot2grid((2,3),(0,1))
ax3 = subplot2grid((2,3),(0,2))
sca(ax1); constrained_plot(animal,sessions,1); xlabel('')
sca(ax2); constrained_plot(animal,sessions,3); xlabel('')
sca(ax3); constrained_plot(animal,sessions,4); xlabel('')

animal = 4
sessions = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
ax4 = subplot2grid((2,3),(1,0))
ax5 = subplot2grid((2,3),(1,1))
ax6 = subplot2grid((2,3),(1,2))
sca(ax4); constrained_plot(animal,sessions,1); 
sca(ax5); constrained_plot(animal,sessions,3); ylabel('')
sca(ax6); constrained_plot(animal,sessions,4); ylabel('')

subplots_adjust(hspace=0.45,wspace=0.5)
for i,predict in enumerate([1,3,4]):
    sca([ax1,ax2,ax3][i])
    title('%s'%kininfo[predict]['name'])

savefigure('constrained_sweep_F3B',stamp=False)

# U tests

Tests whether the concatenated decoders are statistically significantly worse than the same-day decoders. 
Also test whether the shuffled predictions are significantly better than chance

In [None]:
def bootstrap_pvalue(a,b,effective_replicas=1,nsample=1000):
    '''
    Test if a is larger than b
    '''
    a,b = amap(ravel,[a,b])
    if effective_replicas==1:
        d = a[None,:]-b[:,None]
        return mean(d<0)
    # this isn't quite right but it's pretty close
    # ... will revisit later
    aa = [mean(choice(a,effective_replicas)) for i in range(nsample)]
    bb = [mean(choice(b,effective_replicas)) for i in range(nsample)]
    a,b = amap(ravel,[aa,bb])
    d = a[None,:]-b[:,None]
    return mean(d<0)

pvs = {}
for animal,sessions in use:
    for predict in [1,3,4]:
        print('Subject',animal,'Kinematics',kinematics_predict,'Sessions',sessions)
        results,shuffle,pairshuffle,ch = get_all_data(animal,sessions,predict)
        # Get prediction errors
        r2    = array(list(zip(*results)))
        mae   = array([[r['MERR'] for r in rr] for rr in r2])
        maw   = array([[r['MAW' ] for r in rr] for rr in r2])
        madw  = array([[r['MADW'] for r in rr] for rr in r2])
        dwpct = median(madw,axis=1)/median(maw,axis=1)*100
        mmae  = median(mae,axis=1)
        # Get shuffle control results
        s2     = array(list(zip(*shuffle)))
        smae   = array([[r['MERR'] for r in rr] for rr in s2])
        smaw   = array([[r['MAW' ] for r in rr] for rr in s2])
        smadw  = array([[r['MADW'] for r in rr] for rr in s2])
        sdwpct = median(smadw,axis=1)/median(smaw,axis=1)*100
        smmae  = median(smae,axis=1)
        shuffled = smae[-2,:]
        # Get pair-only shuffle control results
        ps2     = array(list(zip(*pairshuffle)))
        psmae   = array([[r['MERR'] for r in rr] for rr in ps2])
        psmaw   = array([[r['MAW' ] for r in rr] for rr in ps2])
        psmadw  = array([[r['MADW'] for r in rr] for rr in ps2])
        psdwpct = median(psmadw,axis=1)/median(psmaw,axis=1)*100
        psmmae  = median(psmae,axis=1)
        pshuffled = psmae[-2,:]
        # Get chance level 
        chae   = array([r['MERR'] for r in ch])
        # Compare single-day and concatenated
        concat    = mae[-2,:]
        singleday = mae[1,:]
        pvs[animal,tuple(sessions),predict,'cat_1day']=bootstrap_pvalue(concat,singleday,effective_replicas=len(sessions))
        # Get shuffle control results
        shuffled = smae[-2,:]
        chance   = chae
        pvs[animal,tuple(sessions),predict,'shuf_ch']=bootstrap_pvalue(chance,shuffled)
        # Get pair shuffle null shuffle control results
        paireshuff = psmae[-2,:]
        pvs[animal,tuple(sessions),predict,'cat_pair']=bootstrap_pvalue(paireshuff,concat)
        
from neurotools.stats import pvalues
pvs = neurotools.stats.pvalues.correct_pvalues(pvs,True);


# F3C

In [None]:
%matplotlib inline
matplotlib.rcParams['figure.dpi']=120

def baselegend(*args,**kwargs):
    '''
    Legend outside the plot on the baes.
    '''
    defaults = {
        'loc':'upper center',
        'bbox_to_anchor':(0.5,-0.2),
        }
    defaults.update(kwargs)
    lg = legend(*args,**defaults)
    lg.get_frame().set_linewidth(0.0)
    return lg

def colored_ballplot(datasets,positions,color,size=6,widths=0.8,zorder=100,linewidth=0.8,clip_on=False,**kwargs):
    for d,x in zip(datasets,positions):
        p5 = percentile(d,5)
        p95 = percentile(d,95)
        plot([x,x],[p5,p95],color=color,linewidth=linewidth,clip_on=clip_on,**kwargs)
        x0 = x-widths/2
        x1 = x+widths/2
        plot([x0,x1],[p5,p5],color=color,linewidth=linewidth,clip_on=clip_on,**kwargs)
        plot([x0,x1],[p95,p95],color=color,linewidth=linewidth,clip_on=clip_on,**kwargs)
    for d,x in zip(datasets,positions):
        scatter([x],[median(d)],color=color,s=size,clip_on=clip_on,**kwargs)

def boxplot_summary(animal,sessions,predict,offset=0,lw=0.8):
    results,shuffle,pairshuffle,ch = get_all_data(animal,sessions,predict)

    # Get prediction errors
    r2    = array(list(zip(*results)))
    mae   = array([[r['MERR'] for r in rr] for rr in r2])
    maw   = array([[r['MAW' ] for r in rr] for rr in r2])
    madw  = array([[r['MADW'] for r in rr] for rr in r2])
    dwpct = median(madw,axis=1)/median(maw,axis=1)*100
    mmae  = median(mae,axis=1)
    
    # Get shuffle control results
    s2     = array(list(zip(*shuffle)))
    smae   = array([[r['MERR'] for r in rr] for rr in s2])
    smaw   = array([[r['MAW' ] for r in rr] for rr in s2])
    smadw  = array([[r['MADW'] for r in rr] for rr in s2])
    sdwpct = median(smadw,axis=1)/median(smaw,axis=1)*100
    smmae  = median(smae,axis=1)
    shuffled = smae[-2,:]
    
    # Get pair-only shuffle control results
    ps2     = array(list(zip(*pairshuffle)))
    psmae   = array([[r['MERR'] for r in rr] for rr in ps2])
    psmaw   = array([[r['MAW' ] for r in rr] for rr in ps2])
    psmadw  = array([[r['MADW'] for r in rr] for rr in ps2])
    psdwpct = median(psmadw,axis=1)/median(psmaw,axis=1)*100
    psmmae  = median(psmae,axis=1)
    pshuffled = psmae[-2,:]
    
    # Get chance level 
    chae   = array([r['MERR'] for r in ch])
    baseline = mean(chae)/100
    
    width = 0.75
    # Draw concatenated error box
    colored_ballplot([mae[-2,:]/baseline],[3+offset],TURQUOISE,widths=width,zorder=100,linewidth=lw)
    # Draw same-day error box
    colored_ballplot([mae[1,:]/baseline],[4+offset],OCHRE,widths=width,zorder=100,linewidth=lw)
    # Set axis limits
    simpleraxis()
    ylabel(kininfo[predicted_variable]['name']+' error (%s)'%kininfo[predicted_variable]['units'])
    # Shuffle concatenated error box
    colored_ballplot([shuffled/baseline],[1+offset],RUST,widths=width,zorder=100,linewidth=lw)
    # Pair-shuffle concatenated error box
    colored_ballplot([pshuffled/baseline],[2+offset],MAUVE,widths=width,zorder=100,linewidth=lw)
    
    if predict==1:
        ylim(25,80)
    if predict==3:
        ylim(30,90)
    if predict==4:
        ylim(40,100)
    xticks([])


figure(figsize=(TEXTWIDTH,TEXTWIDTH/3))
SPACING = 5
fudge = 0.15
for iplot,predicted_variable in enumerate([1,3,4]):
    subplot(1,3,iplot+1)
    for iu,(a,ss) in enumerate(use):
        boxplot_summary(a,ss,predicted_variable,iu*SPACING)
    ylabel('% chance-level error' if iplot==0 else '')
    title(kininfo[predicted_variable]['name'],fontsize=10)
    xlim(-0.2,xlim()[1])
    xticks(arange(4)*SPACING+2.5+fudge,['M%d'%i for i in [1,3,4,5]])
    gca().tick_params(axis='x',length=0)
    axvspan(SPACING*1+fudge,SPACING*2+fudge,color=WHITE,zorder=-inf)
    axvspan(SPACING*3+fudge,SPACING*4+fudge,color=WHITE,zorder=-inf)
    xlim(0+fudge,SPACING*4+fudge)

tight_layout()
subplot(132)
xl = xlim()
yl = ylim()
scatter(-1000,10,s=60,marker='s',color=RUST     ,label='Permute (all)')
scatter(-1000,10,s=60,marker='s',color=MAUVE    ,label='Permute (pair)')
scatter(-1000,10,s=60,marker='s',color=TURQUOISE,label='Concatenated')
scatter(-1000,10,s=60,marker='s',color=OCHRE    ,label='Single-day')
xlim(*xl)
ylim(*yl)
baselegend(handletextpad=0,ncol=4,fontsize=7.5)
subplots_adjust(bottom=0.23)
savefigure('all_mice_decode_concatenated',stamp=False)