<h1><center>NIH MEG Workshop 2024 Preprocessing </center></h1> 

## General Import and settings

In [None]:
import mne
import mne_bids
import os, os.path as op
import numpy as np
import warnings
import copy
import matplotlib.pyplot as plt

# Setup Paths

## Set some generic BIDS information
BIDS is a "new" standard for formatting neuroimaging/neurophysiology data <br>
The bids_root is the top level, derivatives holds the projects, project level has the outputs for each subject

| Folder | Data Type | 
| --- | --- | 
| bids_root | raw data |
| deriv_root | folders for all possible derivatives | 
| project_root | [Derivatives]: output folder for this processing | 
| subjects_dir | [Derivatives]: freesurfer anatomical information | 

In [None]:
#Raw data
bids_root = op.join('/data/', os.environ['USER'], 'meg_workshop_data')  
#Processed Data Folder: contains (freesurfer / Day1 / Day2) 
deriv_root = op.join(bids_root, 'derivatives')
if not op.exists(deriv_root): os.mkdir(deriv_root)
#This course data - bids derivatives outputs for Day1 
project_root = op.join(deriv_root, 'Day1') 
#Freesurfer brain surface reconstruction
fs_subjects_dir = op.join(deriv_root, 'freesurfer','subjects')
subjects_dir = fs_subjects_dir

### Find data for a subject

In [None]:
subject = 'ON02811'
bids_path = mne_bids.BIDSPath(root=bids_root, subject = subject, session='01', task='airpuff', run='01', datatype='meg')
project_path = bids_path.copy().update(root=project_root, check=False)
if not op.exists(project_path.root): os.mkdir(project_path.root)

t1_bids_path = bids_path.copy().update(datatype='anat', session='01', task=None, run=None, suffix='T1w', acquisition='MPRAGE', extension='.nii.gz')
print(f'Here is the meg data file: {bids_path.fpath}')
print(f'Here is the t1 MRI file: {t1_bids_path.fpath}')

## Load the MEG file

In [None]:
# Using the BIDS reader -- same as below
raw = mne_bids.read_raw_bids(bids_path)

In [None]:
# Load the data with the traditional reader -- same as above
# raw = mne.io.read_raw_ctf(bids_path.fpath, clean_names=True, system_clock='ignore')

In [None]:
raw.info        #Note that this data has been anonymized (dates and names are scrubbed)

| Channel Types | Function | |
| --- | --- | --- |
| MEG  |  MEG   | Collect Brain Signal |
| Ref   | Reference Channels | Used for third Order Gradient to reject artifact | 
| UPPT | Digital Trigger  - Parrallel Port | designates trigger value - Up to 256| 
| UADC | Analog Triggers |  designates timing - eg Projector Trigger |  


#### 3rd Order Gradiometers applied - In our case 3rd order compensation is set by default during acquisition

In [None]:
raw.compensation_grade

## Plot **without** 3rd Order Gradient Compensation

In [None]:
%matplotlib inline
_=raw.copy().apply_gradient_compensation(1).plot(n_channels=10)

## Plot **with** 3rd Order Gradient

In [None]:
print('3rd Order Gradient applied')
_=raw.plot(n_channels=10)

In [None]:
raw.load_data()
print(f'The data consists of a matrix: {raw._data.shape[0]} channels by {raw._data.shape[1]} samples')
print(f"This is samples / sampling frequency: {raw._data.shape[1]/raw.info['sfreq']} seconds")

## Channel information

![SensorLayout](images/sens_layout.png)

In [None]:
print('Some examples of channel names')
raw.ch_names[50:110:5]

### Selecting channels

In [None]:
# Also look at raw.pick_channels and raw.pick
# raw.pick_types?

In [None]:
raw.info

In [None]:
raw.pick_types(meg=True, eeg=False, misc=True)._data.shape

In [None]:
print(f'There are {raw.copy().pick_types(meg=True, ref_meg=False)._data.shape[0]} meg channels and {raw.copy().pick_types(meg=False, ref_meg=True)._data.shape[0]} ref channels')

## Processing Triggers

### The data is collected continuously while the subject is presented with stimuli (eg auditory, visual, somatosensory).  The triggers are then used to pull information from the continous dataset

In [None]:
print(f"UADC001 index: {raw.ch_names.index('UADC001')}")
print(f"UPPT001 index: {raw.ch_names.index('UPPT001')}")

In [None]:
#General plotting using python plotting
%matplotlib inline
samp_start = 1000
samp_end = 10000
fig, ax = plt.subplots(2,2)
#Note the indexing from previous cell (301 is UADC, 302 is UPPT)
ax[0,0].plot(raw._data[301, samp_start:samp_end])   
ax[0,0].set_title('UADC001')
ax[0,1].plot(raw._data[302, samp_start:samp_end])
ax[0,1].set_title('UPPT001 - multiple vals')
ax[1,0].plot(raw._data[301, samp_start:int(samp_end/7)])
ax[1,0].set_title('UADC001 (analog) - Zoomed')
ax[1,1].plot(raw._data[302, samp_start:int(samp_end/7)])
ax[1,1].set_title('UPPT001 (digital) - Zoomed')
fig.suptitle('Notice the difference between the Trigger Types')
fig.tight_layout()

### Somatosensory task - Airpuff Stimulus (Stim) vs. Missing Stim 
### UPPT001 codes stimuli; UADC001 communicates to mechanical airpuff delivery system

### Sensor level preprocessing

In [None]:
raw = raw.load_data() 

# Creating Events and Epochs from the data

In [None]:
raw.annotations  #These have been read from the MarkerFile.mrk upon reading 

In [None]:
events, event_ids = mne.events_from_annotations(raw) 

In [None]:
# Dictionary that maps event name to event code
event_ids

In [None]:
events    # [Sample , duration, stim_code]

In [None]:
%matplotlib inline
_ = mne.viz.plot_events(events, sfreq=raw.info['sfreq'], event_id=event_ids)

## Epochs: Time windows of data around each event 

In [None]:
#At time 0ms the stimuli is delivered
tmin=-0.1  #100ms before event
tmax=0.2   #300ms after event

#Calculate all epochs with no rejection
epochs = mne.Epochs(raw, events, tmin=tmin, tmax=tmax, reject_tmax=0,
                preload=True, baseline=(tmin, 0))


In [None]:
print('Number of Epochs X Number of Channels X Number of samples')
print(epochs._data.shape)

In [None]:
%matplotlib qt
_=epochs.plot(n_channels=10, events=False)

### Rejecting bad epochs using amplitude thresholding

In [None]:
reject_dict = {'mag':2500e-15}
cleaned=epochs.copy().drop_bad(reject=reject_dict)

### Epochs data size: Epochs X Channels X TimeSteps

In [None]:
print(f'Original Data Size:{epochs._data.shape}')
print(f'Cleaned  Data Size:{cleaned._data.shape}')
print('The matrix is now 6 epochs less')

### Evoked data averages over epochs: Channels X TimeSteps

In [None]:
evoked = epochs['2'].average()
evoked._data.shape  #Also removes 4 response/stim/clock channels
print('The evoked data averages all of the epochs')

### Butterfly plot of evoked brain activity

In [None]:
%matplotlib inline
_=evoked.plot()

### Plot evoked dataset over time

In [None]:
_=evoked.plot_topomap(times=np.arange(0,.1, 0.01))

### Effect of number of samples on data average

In [None]:
stim_epochs=epochs['2']
stim_epochs.pick_types(meg=True, ref_meg=False)
bg_noise={}

%matplotlib inline
fig,axes = plt.subplots(3,2)
idx=0
reduce = [1,2,4,8,16,32]
for row in axes: 
    for ax in row: 
        evoked = stim_epochs[0::reduce[idx]].average(method='mean')
        ax.plot(evoked.times, evoked._data.T)
        ax.set_ylim(-200e-15, 200e-15)
        ax.set_title(f'# of Epochs in Average: {evoked.nave}')
        bg_noise[evoked.nave]=(evoked._data[:,np.where(evoked.times<0)[0]]**2).sum()
        idx+=1
fig.suptitle('Effects of epoch number on average')
plt.tight_layout()
plt.show()

In [None]:
plt.scatter(bg_noise.keys(), bg_noise.values()); plt.title('Background Prestim Noise versus Number of Epochs'); plt.xlabel('Epochs')

In [None]:
from matplotlib import pyplot as plt

stim_epochs=epochs['2']

%matplotlib qt
fig, axes = plt.subplots(3,2)
idx=0
reduce = [1,2,4,8,16,32]
for row in axes: 
    for ax in row:
        evoked = stim_epochs[0::reduce[idx]].average(method='mean')
        evoked.plot_topomap(times=[0.040], colorbar=False, axes=ax, scalings={'mag':1e15})
        ax.set_title(f'40ms - Number Ave:{evoked.nave}')
        idx+=1
plt.tight_layout()
plt.show()

## Adjusting event timing and creating new Events

In [None]:
from nih2mne.utilities.markerfile_write import main as write_markerfile
from nih2mne.utilities.trigger_utilities import threshold_detect, parse_marks, detect_digital, append_conditions
#detect_digitial --- for digital triggers (UPPT001)
#threshold_detect --- for analog triggers (UADC__)
#parse_marks --- do some fancy fancy trigger stuff

In [None]:
#Changing task to the ODDball task
bids_path.update(task='oddball')
raw = mne_bids.read_raw_bids(bids_path)
if op.exists(op.join(bids_path.fpath,'MarkerFile.mrk')): os.remove(op.join(bids_path.fpath,'MarkerFile.mrk'))

In [None]:
#Get the onsets from the parrallel port
ppt=detect_digital(bids_path.fpath)
ppt #Output is a pandas tabular dataframe

In [None]:
print([i for i in raw.ch_names if i[0:4]=='UADC'], 'These are the UADC channels in the data')
print('UADC005 is the subject response code')

In [None]:
%matplotlib inline
uadcs = raw.copy().pick(['UPPT001','UADC005'])
uadcs.plot(scalings=dict(misc=3))

## After reviewing the data (and also knowing the stim) - the PPT value 3 is the response target

In [None]:
ppt.loc[ppt.condition=='1','condition'] = 'standard'
ppt.loc[ppt.condition=='2','condition'] = 'distractor'
ppt.loc[ppt.condition=='3','condition'] = 'target' 

In [None]:
#Get the onsets of the subject_response device
response=threshold_detect(str(bids_path.fpath), channel='UADC003', mark='response')
response

In [None]:
dframe= append_conditions([response, ppt])

In [None]:
dframe

In [None]:
dframe=parse_marks(dframe, lead_condition='target', lag_condition='response', marker_on='lag', marker_name='CorrectResp')
dframe

In [None]:
dframe.condition.value_counts()

### Annotations / events are not in the original file

In [None]:
raw.annotations

In [None]:
write_markerfile(dframe, bids_path.fpath)

In [None]:
raw=mne_bids.read_raw_bids(bids_path, verbose=False)

### The annotations have now been written into the CTF file

In [None]:
raw.annotations

#### Note that the vertical event designators are now present in the data when plotting

In [None]:
%matplotlib inline
uadcs = raw.copy().pick(['UPPT001','UADC005'])
uadcs.plot(scalings=dict(misc=3))