In [None]:
#%% Imports:
import matplotlib
matplotlib.use('MACOSX')

from math import sqrt
import os
import numpy as np
import mne
#import matplotlib #doesnt it import through mne already?
from copy import deepcopy

#Open data:
#sample_data_folder = mne.datasets.sample.data_path()
#kath_raw_file2 = "/Users/jenya/Documents/Oldenburg and university/Job Uni Rieger lab/Katharinas_Data/sub_HT05ND16/210811/mikado-1.fif"
#kath_raw_file = os.path.join('Katharinas_Data','sub_HT05ND16', '210811', 'mikado-1.fif')
kath_raw_file='/Users/jenya/Local Storage/Job Uni Rieger lab/MEG QC code/data/sub_HT05ND16/210811/mikado-1.fif'

#print(kath_raw_file2)                                   
raw = mne.io.read_raw_fif(kath_raw_file)
#raw = mne.io.read_raw_fif('/Users/jenya/Local Storage/Job Uni Rieger lab/MEG QC code/data/ds004276/sub-001/meg/sub-001_task-words_meg.fif')

#raw.crop(0, 60).load_data()  # just use a fraction of data for speed here

#Print info about the data:
print(raw)
print(raw.info)
raw

#crop the data to calculate faster

raw_cropped = raw.copy()
raw_cropped.crop(0, 300) #(first 5 min)

In [None]:
#%% How to look up info and other usefull things. 
# CAN SKIP THIS WHOLE CELL, DOESNT AFFECT FURTHER STUFF

#name of particular channel:
raw.info['chs'][14]['ch_name']
#See all channel names:
print(raw.info['ch_names'])
# see all avalinle info keys
raw.info.keys()

#See unit of channel number 15: (cos indexing from 0)
raw.info['chs'][14]['unit']

# Plot 5 sec of the first 30 channels.
raw.plot(block=True, duration=5, n_channels=30)
# HOW TO PLOT PARTICULAR RANGE OF CHANNELS?


In [None]:
mag_ch_names = raw.copy().pick_types(meg='mag').ch_names if 'mag' in raw else None
grad_ch_names = raw.copy().pick_types(meg='grad').ch_names if 'grad' in raw else None

channels = {'mags': mag_ch_names, 'grads': grad_ch_names}

#Separate mags and grads:
mags = [(chs['ch_name'], i) for i, chs in enumerate(raw.info['chs']) if str(chs['unit']).endswith('UNIT_T)')]
grads = [(chs['ch_name'], i) for i, chs in enumerate(raw.info['chs']) if str(chs['unit']).endswith('UNIT_T_M)')]

selected_channels = [ch[1] for ch in mags]
data_channels, _ = raw[selected_channels, :]  


data_channels2=raw.get_data(picks = channels['mags'])
print(data_channels == data_channels2)

In [None]:
# Filtering the data: 1-100Hz bandpass, because otherwise calculated stds are too high.
# But actually after filtering they are still high
# Upper level: No useful brain info comes over 100Hz
# Lower level: maybe better from 0.5 because delta frequency is 0.5-4Hz? 
# (needed for frequency spectrum) and we cut it now. 
# But we do 1-100Hz for now.
# Question: does this filtering change the magnitude of stds? from 9 to 12-13?

# I m using here the Butterworth filter similar to filtfilt in matlab, like we  
# did in the course with eeg data. such filter creates no time shift, since it filters forward and backward.
# But we might use a different filter as well. I dont know if this one is the best possible option.

#raw.crop(0, 60)

raw_cropped.load_data(verbose=True)
raw_bandpass = raw_cropped.copy()
raw_bandpass.filter(l_freq=1, h_freq=100, picks='meg', method='iir', iir_params=None, verbose=True)

# "if method=”iir”, 4th order Butterworth will be used" BUT it used filter order 16 here. and why is that???

#Plot first 60 seconds of the filtered data:
#(remove_dc=False) is copied from tutiril. I dont get what it means? No explanation in documentation.
fig = raw_bandpass.plot(duration=60, proj=False, n_channels=len(raw.ch_names), remove_dc=False, verbose=True)


In [None]:
#look at the new filtered raw:

raw_bandpass

In [None]:
#%% Find magnetometers and gradiometers:
# unit t - magenetometer. unit M_T - gradiometer. (in this set name will end with 1 for magnet, 
# with 2and3 for grad.)

mags = []
grads=[]

for i, chs in enumerate(raw_bandpass.info['chs']):

    if str(chs['unit']).endswith('UNIT_T)'):
        mags.append((chs['ch_name'], i))
    elif str(chs['unit']).endswith('UNIT_T_M)'):
        grads.append((chs['ch_name'], i))

print('Magnetometers: ', mags)
print('Gradiometers: ', grads)

In [None]:
#%%other way (shorter):
mags = [(chs['ch_name'], i) for i, chs in enumerate(raw_bandpass.info['chs']) if str(chs['unit']).endswith('UNIT_T)')]
print('Magnetometers: ', mags)

grads = [(chs['ch_name'], i) for i, chs in enumerate(raw_bandpass.info['chs']) if str(chs['unit']).endswith('UNIT_T_M)')]
print('Gradiometers: ', grads)

In [None]:
#%% Found one more way using mne itself (check if shows the same as mine!)
picks_grad = mne.pick_types(raw_bandpass.info, meg='grad', eeg=False, eog=True, stim=False)
picks_mag = mne.pick_types(raw_bandpass.info, meg='mag', eeg=False, eog=True, stim=False)

#Yes, shows the same as my code. Gives indexes of mags and grads (starting from 0), but no channel names.

In [None]:
# Separate data for mags and grads in 2 arrays.
selected_mags = [item[1] for item in mags]
selected_grads = [item[1] for item in grads]
data_mags, times = raw_bandpass[selected_mags, :]  
data_grads, times = raw_bandpass[selected_grads, :]  

#may be useful later: get_data command 
# https://mne.tools/stable/generated/mne.io.Raw.html#mne.io.Raw.get_data

In [None]:
# %% Calculate STD or RMSE of each channel

#Time how long it takes to calculate STD or RMSE:
import time
t0_std = time.time()

#STD:
std_mags=np.std(data_mags, axis=1) #calculate std of all magnetometers (along second dimantion)
std_grads=np.std(data_grads, axis=1) #calculate std of all gradiometers (along second dimantion)

t1_std = time.time()
total_time_std = t1_std-t0_std

mean_std_magn=np.mean(std_mags) #average std magnetometers
mean_std_grad=np.mean(std_grads) #average std gradiometers

print('Mean std of magnetometers data: ', mean_std_magn) #average std
print('Max std of magnetometers data: ',max(std_mags)) #highest std
print('Min std of magnetometers data: ',min(std_mags)) #lowest std.
print('Mean std of gradiometers data: ', mean_std_grad) #average std
print('Max std of gradiometers data: ',max(std_grads)) #highest std
print('Min std of gradiometers data: ',min(std_grads)) #lowest std.

In [None]:
#%% RMSE:
# https://stackoverflow.com/questions/17197492/is-there-a-library-function-for-root-mean-square-error-rmse-in-python

t0_rmse = time.time()

from sklearn.metrics import mean_squared_error

#Magnitometers:
y_actual_mags=data_mags
y_predicted_mags=data_mags.mean(axis=1)
#yeah i know i dont need to rename it, just easier for me this way to deal with RMSE concept

rmse_mags = np.zeros(len(y_predicted_mags)) #RMSE of all magnetometers

for i in range(len(y_predicted_mags)):
    #print(i)
    #print(y_actual[i, :])
    #print(y_predicted[i])
    y_predicted_vec_mags=np.ones(len(y_actual_mags[0]))*y_predicted_mags[i]
    rmse_mags[i] = mean_squared_error(y_actual_mags[i, :], y_predicted_vec_mags, squared=False)

#Gradiometers:
y_actual_grads=data_grads
y_predicted_grads=data_grads.mean(axis=1)
#yeah i know i dont need to rename it, just easier for me this way to deal with RMSE concept

rmse_grads = np.zeros(len(y_predicted_grads)) #RMSE of all gradiometers

for i in range(len(y_predicted_grads)):
    #print(i)
    #print(y_actual[i, :])
    #print(y_predicted[i])
    y_predicted_vec_grads=np.ones(len(y_actual_grads[0]))*y_predicted_grads[i]
    rmse_grads[i] = mean_squared_error(y_actual_grads[i, :], y_predicted_vec_grads, squared=False)


print('Mean of magnetometers data: ', np.mean(rmse_mags)) #average RMSE
print('Max of magnetometers data: ',max(rmse_mags)) #highest RMSE
print('Min of magnetometers data: ',min(rmse_mags)) #lowest RMSE
print('Mean of gradiometers data: ', np.mean(rmse_grads)) #average RMSE
print('Max of gradiometers data: ',max(rmse_grads)) #highest RMSE
print('Min of gradiometers data: ',min(rmse_grads)) #lowest RMSE


t1_rmse = time.time()
total_time_rmse = t1_rmse-t0_rmse

print('Time to calculate std: ', total_time_std)
print('Time to calculate rmse: ', total_time_rmse)

#STD CALCULATION IS MUCH LESS COdE BUT TAKES LONGER THAN RMSE


In [None]:
# DONT RUN. 
# Cos we decided to not pick the largest std, but look for channels outside of 1STD of all channels (next cell)
# But this cell is kept in case we change the approach.

# %% Pick channel with largest STD (RMSE)?
# HOW DO WE DECIDE WHICH CHANNELS TO PICK? ANY PARTICULSR LIMIT?
# If not - just display here channels with largest STD for user to decide
largest_std_mags= np.where(std_mags == max(std_mags))
largest_std_grads= np.where(std_grads == max(std_grads))

mag_channel_largest_std=mags[largest_std_mags[0][0]]
grad_channel_largest_std=grads[largest_std_grads[0][0]]
print('Magnetometer with largest STD: ', mag_channel_largest_std[0])
print('Gradiometer with largest STD: ', grad_channel_largest_std[0])

In [None]:
# Check if channel data is within 1 std over all channels.
# COMMENT: can use -3 to 3 (or other number) std istead of -1/+1 std, but this can adjusted later. 
# find our own best value. or make it user input? 1 std is too narrow, gives way too many bad channels.

std_std_mags=np.std(std_mags)
std_std_grads=np.std(std_grads)

mean_std_mags=np.mean(std_mags)
mean_std_grads=np.mean(std_grads)

ch_large_std_mags= np.where(std_mags > mean_std_mags+std_std_mags) # | std_mags < mean_std_magn-std_std_mags)
ch_large_std_grads= np.where(std_grads > mean_std_grads+std_std_grads) # | std_grads < mean_std_grad-std_std_grads)

ch_small_std_mags= np.where(std_mags < mean_std_mags-std_std_mags)
ch_small_std_grads= np.where(std_grads < mean_std_grads-std_std_grads)


#print(ch_large_std_mags[0])

magn_channel_big_std=np.array(mags)[ch_large_std_mags[0]]
grad_channel_big_std=np.array(grads)[ch_large_std_grads[0]]

magn_channel_small_std=np.array(mags)[ch_small_std_mags[0]]
grad_channel_small_std=np.array(grads)[ch_small_std_grads[0]]

print('Magnetometers with high STD: ', magn_channel_big_std)
print('Gradiometers with high STD: ',grad_channel_big_std)

print('Magnetometers with low STD: ', magn_channel_small_std)
print('Gradiometers with low STD: ',grad_channel_small_std)


#why is MEG0223 not detected as noisy?

In [None]:
#%% Now want to see for example 2 channels with high std: 
#chans = ['MEG2311', 'MEG1542']
noisy_chans = [magn_channel_big_std[0][0], grad_channel_big_std[0][0]]
chan_idxs = [raw_bandpass.ch_names.index(ch) for ch in noisy_chans]
#original_raw_bandpass.plot(order=chan_idxs, start=12, duration=4)
raw_bandpass.plot(order=chan_idxs, start=12, duration=4) #plot here only a part of channel."

In [None]:
# 1 STD over all channels means: 
# Take mean over all stds.
# And -1 STD over all stds from mean
# And +1 std over all stds from mean
# This range means "within 1 std over all channels"
# Here visualise this idea. Middle line is the mean of stds:

from matplotlib import pyplot as plt
#%matplotlib qt
#%matplotlib inline

fig, (ax1, ax2) = plt.subplots(2)
fig.suptitle('STDs')
ax1.plot(list(range(1, len(std_mags)+1)), std_mags)
ax1.plot(list(range(1, len(std_mags)+1)), [mean_std_mags]*len(std_mags))
ax1.plot(list(range(1, len(std_mags)+1)), [mean_std_mags-std_std_mags]*len(std_mags))
ax1.plot(list(range(1, len(std_mags)+1)), [mean_std_mags+std_std_mags]*len(std_mags))
ax1.set(xlabel='Magnetometer', ylabel='STD')

ax2.plot(list(range(1, len(std_grads)+1)), std_grads)
ax2.plot(list(range(1, len(std_grads)+1)), [mean_std_grads]*len(std_grads))
ax2.plot(list(range(1, len(std_grads)+1)), [mean_std_grads-std_std_grads]*len(std_grads))
ax2.plot(list(range(1, len(std_grads)+1)), [mean_std_grads+std_std_grads]*len(std_grads))
ax2.set(xlabel='Gradiometer', ylabel='STD')

plt.show()

#BUT NO! THEY R STILL AT ^-9 AND ^-7!


In [None]:
# DONT RUN.  
# ADD CHANNELS TO BADS. Only if we want to later!

original_bads = deepcopy(raw_bandpass.info['bads'])
#raw_bandpass.info['bads'].append('EEG 050')               # add a single channel
#raw_bandpass.info['bads'].extend(['EEG 051', 'EEG 052'])  # add a list of channels
#bad_chan = raw_bandpass.info['bads'].pop(-1)  # remove the last entry in the list
#raw_bandpass.info['bads'] = original_bads     # change the whole list at once

#Add to bads only channels with big STD (both grads and mags):
for m in magn_channel_big_std:
	raw_bandpass.info['bads'].append(m[0])

for g in grad_channel_big_std:
	raw_bandpass.info['bads'].append(g[0])

print(raw_bandpass.info['bads'])

In [None]:
#%% Next need to calculate also stds for each separate epoch. Because over the all time noisy cgannel can just even out and not be noticed.
# 
# Extracting events and then epoching the data:
# 
# "STIM channels record voltages (usually short, rectangular DC pulses of fixed magnitudes sent from 
# the experiment-controlling computer) that are time-locked to experimental events, such as the 
# onset of a stimulus or a button-press response by the subject (those pulses are sometimes called 
# TTL pulses, event pulses, trigger signals, or just “triggers”). In other cases, these pulses may 
# not be strictly time-locked to an experimental event, but instead may occur in between trials 
# to indicate the type of stimulus (or experimental condition) that is about to occur on the 
# upcoming trial."

#Look at the stimulus channel (can limit to only 3-6 sec here for example, or not):
#raw_bandpass.copy().pick_types(meg=False, stim=True).plot(start=3, duration=6)
raw_bandpass.copy().pick_types(meg=False, stim=True).plot()

events = mne.find_events(raw_bandpass, stim_channel='STI101', min_duration=1.2)

# WHAT IS OPTIMAL DURATION? GIVES DIFFERENT NUMBER OF EVENTS ACCORDING TO DURATION. 
# MAKE DURATION AS USER_PICKED VARIABLE
# min_duration: The minimum duration of a change in the events channel required to consider it as an event (in seconds).

# STI101 is stim data in this file. might be different name in another! 
# (it can be STI 014 in older systems for example). 
# There can as well be several sti channels, and we need the main one which summs all the others.
# HERE WRITE THE CODE THAT WILL AUTOMATICALLY DETECT THE MAIN STI CHANNEL OR allow mne to find it itself:

#"If you don’t provide the name of a STIM channel, find_events will first look for MNE-Python 
# config variables for variables MNE_STIM_CHANNEL, MNE_STIM_CHANNEL_1, etc. If those are not 
# found, channels STI 014 and STI101 are tried, followed by the first channel with type “STIM” 
# present in raw.ch_names. If you regularly work with data from several different MEG systems 
# with different STIM channel names, setting the MNE_STIM_CHANNEL config variable may not be 
# very useful, but for researchers whose data is all from a single system it can be a time-saver 
# to configure that variable once and then forget about it."

# findevents description:
# https://mne.tools/stable/generated/mne.find_events.html#mne.find_events
# https://mne.tools/stable/auto_tutorials/intro/20_events_from_raw.html


#now look at events:
print(events[:5])  # show the first 5

#Plot events:
fig = mne.viz.plot_events(events, sfreq=raw_bandpass.info['sfreq'],
                          first_samp=raw_bandpass.first_samp) #, event_id=event_dict)
#fig.subplots_adjust(right=0.7)  # make room for legend

# "The resulting events array is an ordinary 3-column NumPy array, with sample number in the first column 
# and integer event ID in the last column; the middle column is usually ignored. Rather than keeping 
# track of integer event IDs, we can provide an event dictionary that maps the integer IDs to experimental 
# conditions or events."


#Plots events with the data:

raw_bandpass.plot(events=events, start=5, duration=300, color='gray',
         event_color={9: 'r', 19: 'g', 20: 'b', 21: 'm', 22: 'y'})

In [None]:
#DONT RUN - JUST USEFUL FOR THE FUTURE STUFF

# Continue working with events - extra features:

# How to detect events to then epoch data:
#https://mne.tools/stable/auto_tutorials/intro/10_overview.html#sphx-glr-auto-tutorials-intro-10-overview-py


#create even dictionary if needed:
event_dict = {'auditory/left': 1, 'auditory/right': 2, 'visual/left': 3,
              'visual/right': 4, 'smiley': 5, 'buttonpress': 32}

#This here would allow to reject particular epochs in data. Values are copied from tutorial.
#We would need to first epoch our own data, calculate std over different epochs and channels 
# and then decide which values to put here.

reject_criteria = dict(mag=4000e-15,     # 4000 fT
                       grad=4000e-13,    # 4000 fT/cm
                       eeg=150e-6,       # 150 µV
                       eog=250e-6)       # 250 µV


#For some experiments (such as those intending to analyze resting-state activity) there may not 
# be any experimental events included in the raw recording. In such cases, an Events array of 
# equally-spaced events can be generated using mne.make_fixed_length_events():

new_events = mne.make_fixed_length_events(raw_bandpass, start=5, stop=50, duration=2.)

#By default, the events will all be given the integer Event ID of 1, but you can change that 
# with the id parameter. It is also possible to specify an overlap duration — i.e., if you ultimately
#  want epochs that are 2.5 seconds long, but you want them to overlap by 0.5 seconds, you can specify 
# duration=2.5, overlap=0.5 in the call to make_fixed_length_events() (this will yield the same spacing 
# of events as duration=2, overlap=0).

In [None]:
#Epoching the data:

epochs = mne.Epochs(raw_bandpass, events, tmin=-0.2, tmax=1, preload=True, baseline = None) #, event_id=event_dict, reject=reject_criteria)
# tmin, tmax: float. Start and end time of the epochs in seconds, relative to the time-locked event. Defaults to -0.2 and 0.5, respectively.
# MAKE THESE A USER_DEFINED_VARIABLE. Usually -1 and 3s used. this influences a lot the speed of further calculations.

print(epochs)

#Just for visual: look at the epochs and at the locations of the sensors:

epochs.plot()
#By default plots channels grouped by type: first grads, then mags.
#black veryical lines separate ther epochs.

epochs.plot_sensors(kind='3d', ch_type='all')
#plots the position of each channel

In [None]:
# CHECK NOT ONLY STD OF THE WHOLE CHANNEL BUT ALSO EPOCHS FOR CHANNELS:
# 1.Loop over the epochs of each channel and check for every separate mag and grad and calculate std
# 2.Check which epochs for which channel are over std of this epoch for all channels

#Present epochs as data frame:
df = epochs.to_data_frame(time_format=None, scalings=dict(mag=1, grad=1))
#by default, channel measurement values are scaled so that EEG data are converted 
# to µV, magnetometer data are converted to fT, and gradiometer data are converted 
# to fT/cm. These scalings can be customized through the scalings parameter, or 
# suppressed by passing scalings=dict(eeg=1, mag=1, grad=1).
df


In [None]:

# 1) Loop over the epochs of each channel and check for every separate magn and grad and calculate std

import pandas as pd
eps=list(range(0,len(events))) #list of epoch numbers
mags_names = [mag[0] for mag in mags]
grads_names = [grad[0] for grad in grads]

combined_names = {"mags": mags_names, "grads": grads_names}

dict_mags = {}
dict_grads = {}

for ep in eps: #loop over each epoch
    rows_for_ep = [row for row in df.iloc if row.epoch == ep] #take all rows of 1 epoch, all channels.

    std_epoch = {"mags": [], "grads": []} #dictionary with stds

    for key_of_list in combined_names: #loop over mags, then grads

        for ch_name in combined_names[key_of_list]: #loop over channel names
            
            data_ch_epoch = [row_m[ch_name] for row_m in rows_for_ep]
            #take the data 

            std_ch_ep = np.std(data_ch_epoch)

            std_epoch[key_of_list].append(std_ch_ep)

    dict_mags[ep] = std_epoch["mags"]
    dict_grads[ep] = std_epoch["grads"]

df_std_mags = pd.DataFrame(dict_mags, index=mags_names)
df_std_grads = pd.DataFrame(dict_grads, index=grads_names)

print('Mags std df: ')
df_std_mags
#print('Grads std df: ')
#df_std_grads


# might take really long (depending on the chosen length of epoch). 
# Try to make my RSME calculation into a functin and use insted of std.

In [None]:
# 1*)To check if calculations above were right:
# Important checking step. Noticed: if give time for epoch -1 to 3s, might drop the whole 0 epoch, 
# then will give out false here. Dont know the reason for the issue.

#How to find all indices of particular epoch in this dataframe
epoch0_ind=df.index[df['epoch'] == 0].tolist()

ch0111=df.iloc[epoch0_ind, 3+11] #all data of epoch 0 for MEG0111. 
#index of this channel in raw: 11. moved by 3 elements because data frame creates 3 additional columns
print('data for MEG0111 of epoch 0: ', ch0111)
ch0111_std=np.std(ch0111)
print('Should be: ', ch0111_std)
print('Calculated: ', df_std_mags.iloc[0,0])
print(ch0111_std==df_std_mags.iloc[0,0])

In [None]:
# NEW! STD over Epochs. Do the same as precious, but use separate dfs for mags and grads: 
# part 1) separate dfs craetion

picks_grad = mne.pick_types(raw_bandpass.info, meg='grad', eeg=False, eog=False, stim=False)
picks_magn = mne.pick_types(raw_bandpass.info, meg='mag', eeg=False, eog=False, stim=False)

#events = mne.find_events(raw_bandpass, stim_channel='STI101', min_duration=1.2)
#n_events=len(events)

epochs_mags = mne.Epochs(raw_bandpass, events, picks=picks_magn, tmin=-0.2, tmax=1, preload=True, baseline = None)
epochs_grads = mne.Epochs(raw_bandpass, events, picks=picks_grad, tmin=-0.2, tmax=1, preload=True, baseline = None)

#epochs = mne.Epochs(raw_bandpass, events, tmin=-0.2, tmax=1, preload=True, baseline = None) #, event_id=event_dict, reject=reject_criteria)


#Present epochs as data frame - separately for mags and grads

df_epochs_mags = epochs_mags.to_data_frame(time_format=None, scalings=dict(mag=1, grad=1))

df_epochs_grads = epochs_grads.to_data_frame(time_format=None, scalings=dict(mag=1, grad=1))

In [None]:

# 2) Part 2. Make function to loop over epochs and apply it to mags and grads

import pandas as pd
eps=list(range(0,len(events))) #list of epoch numbers

mags_names = [mag[0] for mag in mags]
grads_names = [grad[0] for grad in grads]

def std_mg(mg_names, df_mg):
    dict_mg = {}

    for ep in eps: #loop over each epoch
        rows_for_ep = [row for row in df_mg.iloc if row.epoch == ep] #take all rows of 1 epoch, all channels.
        std_epoch = [] #list with stds

        for ch_name in mg_names: #loop over channel names
            data_ch_epoch = [row_mg[ch_name] for row_mg in rows_for_ep] #take the data for 1 epoch for 1 channel
            std_ch_ep = np.std(data_ch_epoch)
            std_epoch.append(std_ch_ep)

        dict_mg[ep] = std_epoch

    df_std_mg = pd.DataFrame(dict_mg, index=mg_names)

    return(df_std_mg)
   
#Apply this function for mags and grads:
df_std_mags_2=std_mg(df_mg=df_epochs_mags, mg_names=mags_names)
df_std_grads_2=std_mg(df_mg=df_epochs_grads, mg_names=grads_names)

#look at it:
df_std_mags_2


In [None]:
# 2! To check if calculations above were right:
# Here calculate by hand std over epoch 0 for channel MEG0111, taking data from df_epochs_mags

# Important checking step. 

#How to find all indices of particular epoch in this dataframe
epoch0_ind=df_epochs_mags.index[df_epochs_mags['epoch'] == 0].tolist()

ch0111=df_epochs_mags.iloc[epoch0_ind, 0+3] #all data of epoch 0 for MEG0111. 
#index of this channel in raw: 0. moved by 3 elements because data frame creates 3 additional columns
print('data for MEG0111 of epoch 0: ', ch0111)
ch0111_std=np.std(ch0111)
print('Should be: ', ch0111_std)
print('Calculated: ', df_std_mags_2.iloc[0,0])
print(ch0111_std==df_std_mags_2.iloc[0,0])

In [None]:
# 3! BUT: apparently whole epoching the data points are calculated differently,
# if mags and grads are separate or if they are together!
# differnce is at about 8-10th digit after the coma and further. 
# Some scaling issue?
# This cell show they are differnt:

#How to find all indices of particular epoch in this dataframe
epoch0_ind=df.index[df['epoch'] == 0].tolist()
ch0111=df.iloc[epoch0_ind, 3+11] #all data of epoch 0 for MEG0111. 
#index of this channel in raw: 11. moved by 3 elements because data frame creates 3 additional columns

epoch0_ind_mag=df_epochs_mags.index[df_epochs_mags['epoch'] == 0].tolist()
ch0111_mag=df_epochs_mags.iloc[epoch0_ind, 3] #all data of epoch 0 for MEG0111. 

print('Values that are not equal between df (mags+grads togetehr) and df_epochs_mags (separate): ')
for index, value in enumerate(ch0111):
    if value != ch0111_mag[index]:
        print("Not equal:", index, "\n", value, "\n", ch0111_mag[index], "\n")

In [None]:
# 2) Check (which epochs for which channel) are over 1STD for (this epoch for all channels)

std_lvl=1

 #Find what is 1 std over all channels per 1 epoch:
std_std_mags_per_epoch=[]
std_std_grads_per_epoch=[]
mean_std_mags_per_epoch=[]
mean_std_grads_per_epoch=[]

for ep in eps: #goes over each epoch
    std_std_mags_per_epoch.append(np.std(df_std_mags.iloc[:, ep])) #std of stds of all channels of every single epoch
    std_std_grads_per_epoch.append(np.std(df_std_grads.iloc[:, ep]))

    mean_std_mags_per_epoch.append(np.mean(df_std_mags.iloc[:, ep])) #mean of stds of all channels of every single epoch
    mean_std_grads_per_epoch.append(np.mean(df_std_grads.iloc[:, ep]))


df_ch_ep_large_std_mags=df_std_mags.copy()
df_ch_ep_large_std_grads=df_std_grads.copy()

df_ch_ep_small_std_mags=df_std_mags.copy()
df_ch_ep_small_std_grads=df_std_grads.copy()

#Now see which channles in epoch are over 1 std or under -1 std:
for ep in eps: #goes over each epoch   
    df_ch_ep_large_std_mags.iloc[:,ep] = df_ch_ep_large_std_mags.iloc[:,ep] > mean_std_mags_per_epoch[ep]+std_lvl*std_std_mags_per_epoch[ep] #magnetometers
    df_ch_ep_large_std_grads.iloc[:,ep] = df_ch_ep_large_std_grads.iloc[:,ep] > mean_std_grads_per_epoch[ep]+std_lvl*std_std_grads_per_epoch[ep] #gradiometers

    df_ch_ep_small_std_mags.iloc[:,ep] = df_ch_ep_small_std_mags.iloc[:,ep] < mean_std_mags_per_epoch[ep]-std_lvl*std_std_mags_per_epoch[ep] #magnetometers
    df_ch_ep_small_std_grads.iloc[:,ep] = df_ch_ep_small_std_grads.iloc[:,ep] < mean_std_grads_per_epoch[ep]-std_lvl*std_std_grads_per_epoch[ep] #gradiometers


#look at dataframe:
print('Magnetometers: (which epoch in which channels) are over 1STD in (this epoch over all channels). True=over: ')
df_ch_ep_large_std_mags

In [None]:
# Create csv file from the last data frame for the user:

df_ch_ep_large_std_mags.to_csv('/Users/jenya/Local Storage/Job Uni Rieger lab/MEG QC code/large_std_mags.csv')
df_ch_ep_large_std_grads.to_csv('/Users/jenya/Local Storage/Job Uni Rieger lab/MEG QC code/large_std_grads.csv')


