In [15]:
import os
import pandas as pd 
import numpy as np
import mne
from library import *

df = pd.read_csv("study_p_5_5_raw_bp_notch_avgref.csv", dtype={'pid':str})

all_psd = []
for name, p_p_df in df.groupby(['pid', 'block_id']): #  tqdm(df['pid'].unique()):
    all_psd.append(mne_psd_morlet(p_p_df))

psd_df = pd.concat(all_psd).reset_index(drop=True)
psd_df.to_csv(DBFNAME.split('.csv')[0] + '_psd_morlet' + FILTNAME, index=False)

Creating RawArray with float64 data, n_channels=4, n_times=1194
    Range : 0 ... 1193 =      0.000 ...     4.660 secs
Ready.
Not setting metadata
4 matching events found
No baseline correction applied
0 projection items activated
Using data from preloaded Raw for 4 events and 257 original time points ...
0 bad epochs dropped


  raw.set_annotations(annotations)
[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.


Not setting metadata


[Parallel(n_jobs=-1)]: Done   2 out of   4 | elapsed:    0.8s remaining:    0.8s
[Parallel(n_jobs=-1)]: Done   4 out of   4 | elapsed:    0.8s finished


NameError: name 'df_power' is not defined

In [14]:
def mne_psd_morlet(df):  
    """
    df is one puzzle's worth of data. 
    https://mne.tools/stable/auto_tutorials/time-freq/20_sensors_time_frequency.html#time-frequency-analysis-power-and-inter-trial-coherence
    """  
    data = df[CH_NAMES].values.T
    
    timestamps = df['timestamp'].values
    timestamps -= timestamps[0]
    time_diffs = np.diff(timestamps) / 1e3 # in seconds

    annotations = mne.Annotations(onset=timestamps[:-1],
                                  duration=time_diffs,
                                  description=['EDGE'] * len(time_diffs))

    info = mne.create_info(ch_names=CH_NAMES, sfreq=SAMPLING_RATE, ch_types='eeg')
    raw = mne.io.RawArray(data, info)
    raw.set_annotations(annotations)

    events = mne.make_fixed_length_events(raw, duration=1)
    epochs = mne.Epochs(raw, events, tmin=0, tmax=0.999, baseline=None)

    freqs = np.arange(1, 51)  # Frequencies from 1 to 50 Hz
    n_cycles = freqs / 2. 

    tfr = mne.time_frequency.tfr_morlet(epochs, freqs=freqs, n_cycles=n_cycles, 
                                        average=False, return_itc=False, n_jobs=-1, 
                                        use_fft=True)

    # convert to psd
    power = np.abs(tfr.data)**2

    # convert to log scale (bels)
    log_power = 10 * np.log10(power)

    # shape is now (n_epochs, n_channels, n_freqs, n_times)

    # Assign the dimensions to variables
    num_epochs   = log_power.shape[0]
    num_channels = log_power.shape[1]
    num_freqs    = log_power.shape[2]
    num_times    = log_power.shape[3]

    # Reshape to a 2D array (frequencies x everything else)
    power_2d = log_power.reshape(-1, num_freqs)

    # Create arrays representing the epoch, time, and channel for each row
    epochs_array   = np.repeat(np.arange(num_epochs), num_channels*num_times)
    channels_array = np.tile(np.repeat(np.arange(num_channels), num_times), num_epochs)
    times_array    = np.tile(np.arange(num_times), num_epochs*num_channels)

    # Create frequency labels for the columns
    freq_labels = [f'freq_{freq}Hz' for freq in range(num_freqs)]

    # Create a DataFrame from the power array
    df_power = pd.DataFrame(power_2d, columns=freq_labels)

    # Add these as columns to the DataFrame
    df_power['epoch']   = epochs_array
    df_power['channel'] = channels_array
    df_power['time']    = times_array

    df_power['pid']      = df['pid']
    df_power['elo']      = df['elo']
    df_power['elo_bin']  = df['elo_bin']
    df_power['block_id'] = df['block_id']
    df_power['solved']   = df['solved']
    return df_power
 

    # # Create frequency labels for the columns
    # freq_labels = [f'freq_{freq}Hz' for freq in freqs]

    # # Create a DataFrame from the power array
    # df_power = pd.DataFrame(power_2d, columns=freq_labels)

    # # Create arrays representing the epoch, time, and channel for each row
    # epochs_array   = np.repeat(np.arange(log_power.shape[0]), log_power.shape[1]*log_power.shape[2]*log_power.shape[3])
    # ch_names_array = np.tile(np.repeat(CH_NAMES, log_power.shape[2]*log_power.shape[3]), log_power.shape[0]*log_power.shape[1])
    # times_array    = np.tile(tfr.times, log_power.shape[0]*log_power.shape[1]*len(CH_NAMES))
    
    # # Add these as columns to the DataFrame
    # df_power['epoch']   = epochs_array
    # df_power['time']    = times_array
    # df_power['channel'] = ch_names_array

    # # Reorder the columns so the epoch, time, and channel are first
    # df_power = df_power[['epoch', 'time', 'channel'] + freq_labels]


    # # interpolate timestamps to the psd transformed data    
    # start_time = df['timestamp'].iloc[0]
    # end_time = df['timestamp'].iloc[-1]
    # num_datapoints = psd_df.shape[0]
    # psd_df['timestamp'] = np.linspace(start_time, end_time, num_datapoints)
    
    # these are all the same for each puzzle, so all good.
    # psd_df['pid']      = df['pid']
    # psd_df['elo']      = df['elo']
    # psd_df['elo_bin']  = df['elo_bin']
    # psd_df['block_id'] = df['block_id']
    # psd_df['solved']   = df['solved']
    
    # return psd_df

In [6]:
# https://neuraldatascience.io/7-eeg/erp_artifacts.html
def mne_ica(df):
    data = df[CH_NAMES].values.T
    info = mne.create_info(ch_names=CH_NAMES, sfreq=SAMPLING_RATE, ch_types='eeg')
    raw = mne.io.RawArray(data, info)

    print(mne.channels.make_standard_montage('standard_1020', head_size='auto'))

    ch_coords = np.array([
                            [-0.2852, 0.8777, -0.3826], #[-0.5, -0.5, 0],  # TP9
                            [0.8090, 0.5878, 0.0000], #[-0.5,  0.5, 0],  # AF7
                            [0.8090, -0.5878, 0.0000],#[ 0.5,  0.5, 0],  # AF8
                            [-0.2853, -0.8777, -0.3826]#[ 0.5, -0.5, 0]   # TP10
                        ])

    # Create a DigMontage object
    dig_montage = mne.channels.make_dig_montage(
        ch_pos=dict(zip(CH_NAMES, ch_coords)),
        coord_frame='head'
    )
   
    raw.set_montage(dig_montage)
    
    ica_low_cut = 1.0       # For ICA, we filter out more low-frequency power
    hi_cut  = 30

    raw_ica = raw.copy().filter(ica_low_cut, hi_cut)

    tstep = 1.0
    events_ica = mne.make_fixed_length_events(raw_ica, duration=tstep)
    epochs_ica = mne.Epochs(raw_ica, events_ica,
                            tmin=0.0, tmax=tstep,
                            baseline=None,
                            preload=True)

    reject = get_rejection_threshold(epochs_ica)
    reject

    ica_n_components = .99   # Specify n_components as a decimal to set % explained variance

    ica = mne.preprocessing.ICA(n_components=ica_n_components, random_state=RANDOM_STATE)
    ica.fit(epochs_ica, reject=reject, tstep=tstep )

    ica.plot_properties(epochs_ica, picks=range(0, ica.n_components_), psd_args={'fmax': hi_cut});


    ica_z_thresh = 1.96 
    eog_indices, eog_scores = ica.find_bads_eog(raw_ica, 
                                                ch_name=['probe-1', 'probe-2'], 
                                                threshold=ica_z_thresh)
    ica.exclude = eog_indices

    ica.plot_scores(eog_scores)
    

    # # Get the ICA component sources
    # sources = ica.get_sources(raw)

    # # Compute the kurtosis scores for each component
    # kurt_scores = kurtosis(sources.get_data(), axis=1)    
    # eog_indices = np.where(kurt_scores > 7)[0]
        
    # raw_clean = ica.apply(raw.copy(), exclude=eog_indices)

    # data_clean = raw_clean.get_data()
    # data_clean.plot(title="cleaned raw data after ICA", scalings="auto") #,start=0, duration=10)
