# Betaseries correlation

This script uses beta-series extracted in the previous step to calculate correlation matrices. Executed steps:
1. Restrict atlas to ROIs that reside within brain mask for all participants. To do this script finds all ROIs with null beta-series comparing their standard deviation to zero. Only subjects included in the analysis matter for this step. Then, any ROI with null beta-series in at least one participant / task condition is removed from the atlas. 
2. Split beta-series according to sign of prediction error. This produces `n_subjects` x `n_conditions` x `n_perr_sign` matrices of size roughly `n_included_roi` x `n_trials/2`.
3. Finally beta-series are correlated using Pearson correlation and "internally" z-scored. Note that at this point correlation matrices are calculated for all subjects (even these with failed acquisition) so this can lead to biased matrices for subjects that should be excluded. This is done for consistency since subject will be excluded at the very end of the analysis (before calculating statistical tests). 

This step outputs three files: 

> `corrmats/<atlas_name>/corrmats_aggregated.npy`

Aggregated correlation matrices of shape `n_subjects` x `n_conditions` x `n_perr_sign` x `n_included_roi` x `n_included_roi`.

> `corrmats/<atlas_name>/corrmats_aggregated.json`

Corresponding metadata describing each dimension of the array.

> `corrmats/<atlas_name>/roi_table_filtered.csv`

Parcellation information table without excluded ROI. 

**Note:** In order to repeat analysis for normalized betaseries change input path for `betaseries_aggregated` and output paths for corrmats_aggregated.npy, corrmats_aggregated.json and roi_table_filtered.csv adding `_norm` suffix.

In [1]:
import json
from os.path import join
from pathlib import Path

import numpy as np
import pandas as pd
from dn_utils.behavioral_models import load_behavioral_data
from dn_utils.networks import zscore_matrix
from dn_utils.path import path

### Select brain parcellation

In [2]:
atlas = "combined_roi_4and5"
roi_table_fname = "combined_roi_4and5_table.csv"

In [3]:
# Create paths
path_corrmats = join(path["bsc"], "corrmats")
Path(path_corrmats).mkdir(exist_ok=True)

# Load behavioral data
beh, meta = load_behavioral_data(path["behavioral"], verbose=False)
n_subjects = beh.shape[0]
n_conditions = beh.shape[1]
n_trials = beh.shape[2]
n_perr_sign = 2

# Load betaseries
betaseries_aggregated = np.load(
    join(path["bsc"], f"betaseries/{atlas}/betaseries_aggregated.npy"))

# Load ROI table
df_roi = pd.read_csv(join(path["parcellations"], atlas, roi_table_fname))
n_roi = len(df_roi)

# Load subject exclusion
df_exclusion = pd.read_csv(
    join(path["nistats"], "exclusion/exclusion.csv"), index_col=0)

df_roi.head()

Unnamed: 0,x,y,z,netCode,netColor,netName,radius(mm),origNetName,strategy,origName
0,42,-66,-8,7.0,Blue,visual,5,visual,,
1,6,-72,24,7.0,Blue,visual,5,visual,,
2,43,-78,-12,7.0,Blue,visual,5,visual,,
3,-47,-76,-10,7.0,Blue,visual,5,visual,,
4,-14,-91,31,7.0,Blue,visual,5,visual,,


### Exclude ROIs without signal

In [4]:
# Find ROIs with signal std equal to zero
# Dataframe shape is (n_subjects * n_condition, n_rois)
df_roi_coverage = pd.DataFrame(
    np.vstack((np.std(betaseries_aggregated, axis=2)[:, 0, :].astype(bool),
               np.std(betaseries_aggregated, axis=2)[:, 1, :].astype(bool)))
)

# Add columns corresponding to subjects and conditions
df_roi_coverage["sub"] = meta["dim1"] * 2
df_roi_coverage["con"] = np.repeat(meta["dim2"], n_subjects)

# Remove rows corresponding to subjects excluded from the analysis 
included_subjects = list(df_exclusion.loc[df_exclusion["ok_all"], "sub"])
included_series = df_roi_coverage["sub"].map(lambda sub: sub in included_subjects)
df_roi_coverage = df_roi_coverage.loc[included_series, :]

# Mark excluded ROIs if for at least one subject there is empty signal
included_roi = df_roi_coverage.all(axis=0)[:-2]
excluded_roi = ~ included_roi

excluded_roi_list = list(excluded_roi[excluded_roi].index)

n_included_roi = sum(included_roi)
n_excluded_roi = sum(excluded_roi)

# Show excluded ROIs
print(f"✅ Remaining: {n_included_roi} ROIs")
print(f"❌ Excluded: {n_excluded_roi} ROIs\n")
df_roi.loc[excluded_roi_list]

✅ Remaining: 268 ROIs
❌ Excluded: 4 ROIs



Unnamed: 0,x,y,z,netCode,netColor,netName,radius(mm),origNetName,strategy,origName
48,-31,-10,-36,-1.0,White,uncertain,5,uncertain,,
53,-56,-45,-24,-1.0,White,uncertain,5,uncertain,,
54,8,41,-24,-1.0,White,uncertain,5,uncertain,,
59,52,-34,-27,-1.0,White,uncertain,5,uncertain,,


### Calculate correlation matrices

In [5]:
corrmats_aggregated = np.zeros(
    (n_subjects, n_conditions, n_perr_sign, n_included_roi, n_included_roi))

for sub_idx, sub in enumerate(meta["dim1"]):
    for con_idx, con in enumerate(meta["dim2"]):
        
        won_bool_idx = meta["dim4"].index("won_bool")
        won_bool = beh[sub_idx, con_idx, :, won_bool_idx].astype(bool)            
        
        # Split betaseries into +PE and -PE conditions
        betaseries_perr_pos = betaseries_aggregated[sub_idx, con_idx, 
                                                    won_bool][:, included_roi]
        betaseries_perr_neg = betaseries_aggregated[sub_idx, con_idx, 
                                                    ~won_bool][:, included_roi]
        
        # Calculate correlation matrices & z-score them
        corrmat_perr_pos = zscore_matrix(np.corrcoef(betaseries_perr_pos.T))
        corrmat_perr_neg = zscore_matrix(np.corrcoef(betaseries_perr_neg.T))

        corrmats_aggregated[sub_idx, con_idx, 0] = corrmat_perr_pos
        corrmats_aggregated[sub_idx, con_idx, 1] = corrmat_perr_neg

print("corrmats_aggregated.shape: ", corrmats_aggregated.shape)

  new_arr = .5 * np.log((1 + arr) / (1 - arr))


corrmats_aggregated.shape:  (32, 2, 2, 268, 268)


  c /= stddev[:, None]
  c /= stddev[None, :]


### Store correlation matrices and associated metadata

In [6]:
path_corrmats_atlas = join(path_corrmats, atlas)
Path(path_corrmats_atlas).mkdir(exist_ok=True)

# Save aggregated correlation matrices
np.save(join(path_corrmats_atlas, "corrmats_aggregated.npy"), corrmats_aggregated)

# Save associated metadata
metadata = {
    "dim1": meta["dim1"],
    "dim2": meta["dim2"],
    "dim3": ["perr_pos", "perr_neg"],
    "dim4": list(df_roi[included_roi]["netName"]),
    "dim5": list(df_roi[included_roi]["netName"])
}
with open(join(path_corrmats_atlas, "corrmats_aggregated.json"), "w") as f:
    f.write(json.dumps(metadata, indent=4))
    
# Save filtered ROIs
df_roi[included_roi].to_csv(join(path_corrmats_atlas, "roi_table_filtered.csv"))