**Outline**

The purpose of this script is to load "long" epoch TFRs for both the button-press (BP) and resting-state tasks, and concatenate data dfrom each task into:
1. A channels x epochs x frequencies x timepoints array
2. A channels x epochs x timepoints array, averaged over the beta frequency range

**Import packages**

In [1]:
import os
import mne 
import pandas as pd
import numpy as np
import tqdm
from tqdm import tqdm

mne.set_log_level('ERROR')

**Define data directories and filenames**

In [2]:
# Data path (from which we will read files)
data_path =  os.path.join("/media/NAS/lbailey/PMBR_timecourse/output/proc_data")

# Output path (where we will save files)
out_path = os.path.join("/media/NAS/lbailey/PMBR_timecourse/output/1BP15")

if not os.path.exists(out_path):
    os.makedirs(out_path)

# Define generic filenames
rest_suffix = '_epoch_tfrs_no_baseline_1s-pre_15s-post_3-min_rest-tfr.h5'
bp_suffix = '_epoch_tfrs_no_baseline_1s-pre_15s-post_trial-tfr.h5'

**Import subject list**

In [3]:
# Get list of subjects from the demographics csv
df_demo_allsubjects = pd.read_csv("/home/timb/camcan/proc_data/demographics_allSubjects.csv")
subject_list = list(df_demo_allsubjects.loc[(df_demo_allsubjects['RawExists'] == 1)]['SubjectID'])

# Remove the following subjects from subject_list. These subjects either had missing rest data, or ICA failed to converge on their BP data
missing_subjects = ['CC620685', 'CC620444', 'CC120208', 'CC621118', 'CC410097', 
                    'CC620557', 'CC723197', 'CC221733', 'CC711244', 'CC720330', 
                    'CC620567', 'CC122016', 'CC512003', 'CC610462', 'CC510480']

for i in missing_subjects:
    subject_list.remove(i)

# Also note that CC621080 has BP data but no rest data, because ICA failed for the latter. 
# We'll keep this subject because we are primarily concerned with BP data; but it's worth 
# noting that the rest data is missing from one subject.



**Define a function to read tfr data (rest or BP) from individual subjects**

In [4]:
def load_tfr(subject, task):
   
    # Define path to tfr file for this subject and task
    tfr_fname =  os.path.join(data_path, subject, subject + eval(f'{task}_suffix'))
    
    if not os.path.exists(tfr_fname):
        print(f'The {task} tfr file for {subject} does not exist. Skipping...')
        return

    # Load the tfr. Note that tfr has the form (n_epochs, n_channels, n_freqs, n_times)
    tfr = mne.time_frequency.read_tfrs(tfr_fname)

    # Append subjectID
    # tfr.subject = subject

    return tfr

**Define empty lists to store imported TFRs, and keep track of participant and epoch counts for each task**

In [5]:
# Make an empty list to store tfr data from each task
tfrs_bp = []
tfrs_rest = []

# Keep track of any skipped subjects (e.g., due to missing data)
skipped_subjects_bp = []
skipped_subjects_rest = []

# Count epochs per participant
n_epochs_bp = []
n_epochs_rest = []


**Load the tfrs for each task and append to respective numpy arrays**

In [8]:
# Loop through all subjects and load their TFR data
for i, subject in enumerate(tqdm(subject_list[:1])):

    for task in ['bp', 'rest']:

        # Determine which containers we will store data in, depending on the task
        tfrs = eval(f'tfrs_{task}')
        skipped_subjects = eval(f'skipped_subjects_{task}')
        n_epochs = eval(f'n_epochs_{task}')

        # Load the tfr data. We'll use a try statement here because some subjects return None
        try:
            orig_tfr = load_tfr(subject, task)

            tfrs.append(orig_tfr.get_data())
            n_epochs.append(orig_tfr.get_data().shape[0])

            # Also get info, times and freqs from the first subject
            if i == 0:                
                info = orig_tfr.info
                times = orig_tfr.times
                freqs = orig_tfr.freqs

                # Save to disk
                mne.io.write_info(os.path.join(out_path, f'{task}-info.fif'), info)
                np.save(os.path.join(out_path, f'{task}_times.npy'), times)
                np.save(os.path.join(out_path, f'{task}_freqs.npy'), freqs)

        except:
            skipped_subjects.append(subject)
            continue
        



100%|██████████| 1/1 [00:00<00:00,  1.69it/s]


**Print out participant & epoch counts for each task**

In [None]:
# Print total number of epochs
print(f'Total number of epochs in BP task: {sum(n_epochs_bp)}')
print(f'Total number of epochs in rest task: {sum(n_epochs_rest)}')

# Print total number of subjects
n_subjects_bp = len(subject_list) - len(skipped_subjects_bp)
n_subjects_rest = len(subject_list) - len(skipped_subjects_rest)

print(f'Number of subjects in BP task: {n_subjects_bp}')
print(f'Number of subjects in rest task: {n_subjects_rest}')

# Print total number of subjects with only one epoch
print(f'Number of subjects with only one epoch in BP task: {len([i for i in n_epochs_bp if i == 1])}')

# Print out any skipped subjects
print(f'Skipped subjects in BP task: {len(skipped_subjects_bp)}')
print(f'Skipped subjects in rest task: {len(skipped_subjects_rest)}')

**Concatenate lists of TFRs into one array per task**

In [None]:
tfrs_bp_concat = np.concatenate(tfrs_bp, axis=0)
tfrs_rest_concat = np.concatenate(tfrs_rest, axis=0)

# Print shape of the new arrays
print(f'Shape of concatenated BP data: {tfrs_bp_concat.shape}')
print(f'Shape of concatenated rest data: {tfrs_rest_concat.shape}')

In [15]:
# Save the concatenated arrays to disk
np.save(os.path.join(out_path, f'tfr_data_BP_{n_subjects_bp}_subjects.npy'), tfrs_bp_concat)
np.save(os.path.join(out_path, f'tfr_data_rest_{n_subjects_rest}_subjects.npy'), tfrs_rest_concat)


In [3]:
# # Optional - load from disk
# tfrs_BP_concat = np.load(os.path.join(out_path, f'tfr_data_BP_617_subjects.npy'))
# tfrs_rest_concat = np.load(os.path.join(out_path, f'tfr_data_rest_612_subjects.npy'))

**Average TFR arrays over beta range**

In [16]:
# Define beta frequency range
beta_freqs = np.argwhere((15 < freqs) & (freqs < 30))

# Average over frequency. Uee axis=2 because frequency is the third axis in this array
tfrs_rest_beta = np.mean(tfrs_rest_concat[:, :, beta_freqs, :], axis=2).squeeze()
tfrs_bp_beta = np.mean(tfrs_bp_concat[:, :, beta_freqs, :], axis=2).squeeze() 

In [17]:
# Save to disk
np.save(os.path.join(out_path, f'tfr_data_BP_beta_{n_subjects_bp}_subjects.npy'), tfrs_bp_beta)
np.save(os.path.join(out_path, f'tfr_data_rest_beta_{n_subjects_rest}_subjects.npy'), tfrs_rest_beta)

In [4]:
# Load from disk
tfrs_BP_beta = np.load(os.path.join(out_path, f'tfr_data_BP_beta_617_subjects.npy'))
tfrs_rest_beta = np.load(os.path.join(out_path, f'tfr_data_rest_beta_612_subjects.npy'))