**Outline**

The purpose of this script is to compute "long trial" TFRs from the resting state data, based on a random selection of epochs equivalent to those selected for the button-press (BP) analysis 

**Import packages**

In [16]:
import os
import mne

import pandas as pd
import numpy as np
from tqdm import tqdm

# Suppress output from mne unless it requires attention
mne.set_log_level('WARNING')

**Define data directories and filenames**

In [17]:
# Path to raw data
raw_path = os.path.join("/media/WDEasyStore/timb/camcan/download/20170824/cc700/meg/pipeline/release004/data_movecomp/aamod_meg_maxfilt_00002")

# Data path (from which we will read some files and write output)
data_path = os.path.join("/media/NAS/lbailey/PMBR_timecourse/output")

# Path to Lindsey's proc data folder (containing ICA files for the resting state data)
lpower_proc_data_path = os.path.join("/media/NAS/lpower/camcan/spectralEvents/rest/proc_data/")

# Define generic filenames
raw_fif_fname = 'transdef_mf2pt2_rest_raw.fif'
ica_fname = 'transdef_mf2pt2_rest_raw-ica.fif'

**Import subjects list and BP trial timing information**

In [18]:
# Load in the button-press trial timing data. 
df_trial_timings_allsubjects = pd.read_csv(os.path.join(data_path, "trial_timings.csv")).drop_duplicates(ignore_index=True) # Important: drop duplicates

# Get list of subjects from the demographics csv
df_demo_allsubjects = pd.read_csv("/home/timb/camcan/proc_data/demographics_allSubjects.csv")
subject_list = list(df_demo_allsubjects.loc[(df_demo_allsubjects['RawExists'] == 1)]['SubjectID'])

# Remove the following subjects from subject_list. These subjects either had missing raw resting state data, or ICA failed to converge on their BP data
missing_subjects = ['CC620685', 'CC620444', 'CC120208', 'CC621118', 'CC410097', 
                    'CC620557', 'CC723197', 'CC221733', 'CC711244', 'CC720330', 
                    'CC620567', 'CC122016', 'CC512003', 'CC610462', 'CC510480', 
                    'CC621080']

for i in missing_subjects:
    subject_list.remove(i)

**Define timing parameters for selected events**

In [19]:
# Define time limits before and after the stimulus (i.e. time since the last trial and time until the next trial).
# Note that these were the same constraints applied to the BP analysis
pre_trial_time = 1
post_trial_time = 15

# Define suffix for output tfr file
tfr_suffix = f'_epoch_tfrs_no_baseline_{pre_trial_time}s-pre_{post_trial_time}s-post_3-min_rest-tfr.h5'

**Define function to do the work**

This will load the raw data, perform preprocessing, epoch according to our trial selection criteria, and compute and save TFRs to disk

In [20]:
def compute_long_rest_tfrs(subject):

    # Define input files
    raw_fif_path = os.path.join(raw_path, subject, 'rest', raw_fif_fname)
    ica_path = os.path.join(lpower_proc_data_path, subject, ica_fname)

    # Skip if the raw data file does not exist
    if not os.path.exists(raw_fif_path):
        print(f"Skipping {subject} as raw data file does not exist")
        return

    # Define output files
    out_path = os.path.join(data_path, 'proc_data', subject)
    if not os.path.exists(out_path):
        os.makedirs(out_path)

    tfr_path = os.path.join(out_path, f'{subject}_{tfr_suffix}')

    print(tfr_path)

    # Check Lindsey's proc_data directory for the ICA file. If it does not exist, 
    # check the output folder for this subject 
    if not os.path.exists(ica_path):
        ica_path = os.path.join(out_path, ica_fname)

    ica = mne.preprocessing.read_ica(ica_path)
   

    # Skip this subject if the TFR file already exists
    if os.path.exists(tfr_path):
        return

    # We're going to segment the rest data into 16s epochs. Ultimately we want the same number of rest epochs as we have long BP epochs
    # (per participant). So, we need to use the BP timing data to determine how many rest epochs we should extract.

    # Pull the rows of df_demo_allsubjects and df_trial_timings_allsubjects for this subject
    df_trial_timings = df_trial_timings_allsubjects[df_trial_timings_allsubjects['subject'] == subject]

    # Subset rows of df_trial_timings to only include trials fitting our pre- and post-trial times
    df_trial_timings_subset = df_trial_timings[(df_trial_timings['t_since_prev_trial'] >= pre_trial_time) 
                                        & (df_trial_timings['t_until_next_trial'] >= post_trial_time)]
        
    # Get the number of trials in this subset. This is the number of epochs we now want to extract from the rest data
    n_epochs = len(df_trial_timings_subset)

    # Define the length of our desired epochs
    epoch_duration = (pre_trial_time + post_trial_time)


    # Load the raw data
    raw = mne.io.read_raw_fif(raw_fif_path, preload=True)

    # Apply filtering
    raw_filt = raw.copy().filter(0, 40)

    # Epoch the raw data. MNE has a handy function to generate events of fixed length based on raw data. Note that we want epochs of length 16s, but we will add an extra 1s to the start and end of each epoch to avoid edge effects when it comes to computing the TFRs

    # We'll take data from the middle 3 minutes of the scan. Note that the max N epochs we can extract in 3 minutes / 16s = 11.25 epochs. No subjects has more than 4 long BP trials
    events = mne.make_fixed_length_events(raw_filt, 
                                          start = 170,  # start of 3-minute middle section
                                          stop = 350,   # end of 3-minute middle section
                                          duration = epoch_duration+2, overlap = 0)  

    # Skip if there are no events
    if len(events) == 0:
        return
    
    # Perform epoching
    epochs = mne.Epochs(raw_filt, events, tmin=-(pre_trial_time+1), tmax=(post_trial_time+1), baseline = None, preload=True)

    # Select a random N epochs, where N == the number of long BP trials for this subject
    epochs_subset = epochs[np.random.choice(range(len(epochs)), n_epochs, replace=False)]

    if len(epochs_subset) == 0:
        return

    # Apply ICA to the epochs
    epochs_icad = ica.apply(epochs_subset.load_data())

    # Pick sensors of interest
    epochs_icad_picks = epochs_icad.copy().pick(['MEG0211', 'MEG1311'])

    # Set parameters for TFR
    freqs = np.arange(1, 40, 1) 
    n_cycles = freqs / 2.0

    # Compute TFR for each trial
    tfr = epochs_icad_picks.compute_tfr(method='morlet', freqs=freqs, n_cycles=n_cycles, average=False, decim=10) 

    # Crop time back to our desired window, which will eliminate edge effects
    tfr_cropped = tfr.copy().crop(tmin=-pre_trial_time, tmax=post_trial_time)

    # Save the TFR to disk
    tfr_cropped.save(tfr_path, overwrite=True)

**Do the work**

In [None]:
# Loop through subjects and compute trial TFR(s) for each
for subject in tqdm(subject_list[:1]):
    compute_long_rest_tfrs(subject)