In [1]:
import pandas as pd
import mne
import numpy as np
import os

In [3]:
#functions
def create_demographics(file_path, beh_df_path, number_of_trials):
    '''Add id, city, group and condition to spectral power df.
    relies strongly on data from resting-state eeg psychedelic project - it requires data in format like 101_epo_ec-psd.hdf5
    
    file_path - in format, you have to be in a directory with a spectral data 
    beh_df_path - path to the proj_cpa behawior dataframe
    number_of_trials - it has to be here even though it will be defined earlier in a stream
    
    This function will be used inside trials_by_bands()'''

    #load
    beh_df = pd.read_csv(beh_df_path, sep=';')
    participant_id = file_path[:3]
    
    #filter
    #single row of a df but with all columns
    participants_dem = beh_df[beh_df['participant_id'] == 'sub-' + participant_id]

    #Check for missing behavioral data
    if participants_dem.empty:
        print(f'Participant {participant_id} lacks behavioural data')
    
    #select columns
    participants_dem = participants_dem[['group', 'participant_id']]
    #add city information
    if participant_id[0] == '1':
        participants_dem['city'] = 'krk'
    elif participant_id[0] == '0':
        participants_dem['city'] = 'wwa'
    #add eyes condition
    if file_path[9] == 'c':
        participants_dem['condition'] = 'ec'
    elif file_path[9] == 'o':
        participants_dem['condition'] = 'eo'
    
    #make ntrials rows so dimensions will match
    participants_dem = pd.concat([participants_dem] * number_of_trials, ignore_index=True)

    #ntrials x demographic columns
    return participants_dem

    
def band_x_mask_of_that_band_dict(frequency_vector_path):
    
    '''Write the frequency bands masks from a freq vector stored in .npy format.
    Return a dictionary where each band is represented by a logical mask where True values represent the indices of that band in a freq vetor.'''

    freqs_vec = np.load(frequency_vector_path)
    bands_mask_dict = {'delta':np.logical_and(freqs_vec>=1,freqs_vec<4),
              'theta':np.logical_and(freqs_vec>=4,freqs_vec<9),
              'alpha':np.logical_and(freqs_vec>=9,freqs_vec<14),
              'beta':np.logical_and(freqs_vec>=14,freqs_vec<31),
              'gamma':np.logical_and(freqs_vec>=31,freqs_vec<=45)}

    return bands_mask_dict


def trials_by_bands(file_path, freq_mask_dict, chan_select_list='all', drop_chans_list='none', convert_units=True):

    '''
    You have to be in a folder with spectrum objects. os.chdir(folder_path)
    
    file_path - has to be created by for instance mne.time_frequency.read_spectrum(file_path)
    save_folder - needs a folder because one df for each frequency band will be created.
    freq_vector_path - to handle frequency boundaries across different setups. Requires a dicionary created with mask_bands().
    chan_select - for the electrode selection. Requires a list with channel names. All channels by default.
    drop_chans - for the electrode selection. Requires a list with channel names. None by default.
    
    Arguments specific to rs psychedelic project:
    convert_units - conversion from V^2/Hz to Db (mV^2/Hz). True by default.'''  


    #check the path
    if len(file_path) < 10 or (file_path[0] not in ['0', '1']) or file_path[9] not in ['o', 'c']:
        raise ValueError('Path is not in the desired format like: "101_epo_ec-psd.hdf5"')
    
    #upload spectrum object (trials, channels, npoints)
    spectrum_object = mne.time_frequency.read_spectrum(file_path)
    
    #optional electrode selection
    #check whether that is a list
    if (chan_select_list != 'all') and (type(chan_select_list) == list):
        spectrum_object.pick(chan_select_list)

    if (chan_select_list != 'all') and (type(chan_select_list) != list):
        raise ValueError('Channel selection argument should be a list')

    #similar drop channels
    if (drop_chans_list != 'none') and (type(drop_chans_list) == list):
        spectrum_object.drop_channels(drop_chans_list)

    if (drop_chans_list != 'none') and (type(drop_chans_list) != list):
        raise ValueError('Channel selection argument should be a list')
    
    #extract data
    spectral_data = spectrum_object.get_data()
    
    #average over channels
    #trials x npoints
    spectral_data = np.mean(spectral_data, axis=1)
    
    #get the information about the number of trials
    #note that ntrials differs between participants but it doesn't matter since we concatenate them vertically and the ncolumns remains the same
    ntrials = np.shape(spectral_data)[0]
    
    #convert units
    #This has to bee here cause MNE requires volts, but UJ doesn't. You might want to adjust this line in future projects.
    #So the conversion is from V^2/Hz to mV^2/Hz
    #Detrend the spectral data using log
    if convert_units == True:
        spectral_data = spectral_data * (10**12)
        spectral_data = 10 * np.log10(spectral_data)
    
    #initialise df with spectral powers per trial ACROSS ALL BANDS
    #number of trials x bands
    #for a single subject
    all_bands_df = pd.DataFrame(np.zeros((ntrials, len(bands))), columns=bands)
    all_bands_df['trial_number'] = [x for x in range(ntrials)]
    
    #loop over trials
    for trial_idx in range(ntrials):
        #loop over bands
        for band_idx, band_name in enumerate(bands):
            #get the data from a trial indicated by trial idx 
            #and from band using a mask where Trues stands for data from a given band
            spectral_data_trial_band = spectral_data[trial_idx,freq_mask_dict[band_name]]
            #average 
            data_trial_band_average = np.mean(spectral_data_trial_band)
            #upload all bands df
            all_bands_df.iloc[trial_idx, band_idx] = data_trial_band_average
    
    #create df with demographics
    #note that they are the same for a given file so only column with PSD differs
    path_to_beh = r"C:\Users\stasi\OneDrive\Pulpit\proj_mgr\all_channels\output\dataframes\data_beh_main_krk_wwa.csv"#shit code
    dem_df = create_demographics(file_path, path_to_beh, ntrials)
    
    #Copy dem df and add trial number and PSD value
    #do it separately for each band
    for band_idx, band_name in enumerate(bands):
        #create a copy of dem df
        participant_df = dem_df.copy()
        #add PSD value from a certain band and trial number
        participant_df['trial_number'] = all_bands_df['trial_number']
        participant_df[band_name] = all_bands_df[band_name]
        #add a df to a proper list of dfs for a future concat
        dfs_all[band_idx].append(participant_df)

In [4]:
#set up the parameters
#band order is really important so start with the lowest frequency
bands = ['delta', 'theta', 'alpha', 'beta', 'gamma']

delta_dfs = []
theta_dfs = []
alpha_dfs = []
beta_dfs = []
gamma_dfs = []

dfs_all = [delta_dfs, theta_dfs, alpha_dfs, beta_dfs, gamma_dfs]

#folders with spectral files
folders = [r"D:\mgr_disc\Final\Krk_spectral_tensors",
          r"D:\mgr_disc\Final\Wwa_spectral_tensors"]

freq_vecs_paths = [r"D:\mgr_disc\freq_vectors\freqs_krk.npy", 
                  r"D:\mgr_disc\freq_vectors\freqs_wwa.npy"]

save_folder_path = r"C:\Users\stasi\OneDrive\Pulpit\proj_mgr\all_channels\output\dataframes\PSD_trials_sep"

#separately for both cities
#krk is always first
for folder_path, freq_vec in zip(folders, freq_vecs_paths):
    os.chdir(folder_path)
    files = os.listdir()
    #create frequency masks here because cities differ in srate and epoch len
    bands_masks = band_x_mask_of_that_band_dict(freq_vec)

    #loop over participants
    for participant in files:
        trials_by_bands(participant, bands_masks)

#concatenate vertically all participant dfs and save them
#loop over bands
for band_idx, band_name in enumerate(bands):
    monster_df = pd.concat(dfs_all[band_idx], axis=0, ignore_index=False)
    monster_df = monster_df.dropna()
    monster_df.to_csv(save_folder_path + fr"\_{band_name}_PSD_trials_separately.csv")
    #save it second time so it will appear immideatly in a mixed models path
    monster_df.to_csv(r"C:\Users\stasi\OneDrive\Pulpit\proj_mgr\all_channels\scripts\mixed_models" + fr"\_{band_name}_PSD_trials_separately.csv")

Participant 121 lacks behavioural data
Participant 121 lacks behavioural data
