In [None]:
# Import necessary libraries
import getpass
import glob
import os
import pandas as pd
import numpy as np
import h5py


In [None]:

# Get the current user
user = getpass.getuser()
print("Hello", user)

# Define base directory paths
base = 'C:\\Users\\{}\\Dropbox\\CPLab'.format(user)
print('base = ', base)

# Define paths for saving filtered files and results
files_savepath = base + '\\all_data_filtered_mat\\'
print('filtered files will be stored at', files_savepath)

savepath = base + '\\results\\'
print('plots and stuff will be at', savepath)


In [None]:

"""Load and clean filter data from an Excel file"""

filter_data = pd.read_excel("C:\\Users\\{}\\Dropbox\\CPLab\\filecheck.xlsx".format(user)) #Loading up the data that contains the manual annotation from Christiane
filter_data = filter_data.dropna(axis=1, how='all') #Removes the columns where all the values are NA
filter_data = filter_data.dropna(axis=0, how='all') # Removes the rows where all the values are NA
filter_data = filter_data.drop(filter_data.columns[-1], axis=1) #Drop the last column that just contains comments
filter_data.reset_index(drop=True, inplace=True) # Reset the index becuase of the dropped rows
filter_data.columns = ['file', 'LFP1_AON', 'LFP2_AON', 'LFP3_AON', 'LFP4_AON', 'LFP1_vHp', 'LFP2_vHp', 'date'] #Manually set the column names

"Extract file names without extensions from filter data"

filter_data_files = list(filter_data['file']) #take the 'file' column from the dataframe
filter_base_names_without_ext = [os.path.splitext(os.path.basename(file))[0] for file in filter_data_files] #extract the file names, removes the paths from the filenames and stores it as a list
print(filter_base_names_without_ext)

# Update filter data with base names without extensions
filter_data['file'] = filter_base_names_without_ext

# Extract annotations from filter data
filter_data_annotations = filter_data.iloc[:, 1:6] #Extracting annotations "no" and "ok"
filter_data_annotations = np.array(filter_data_annotations) #saving it as an array
print(np.unique(filter_data_annotations)) #printing out the uniques annotations to make sure that annotations are "no" and "ok" only


In [None]:

# Find all .mat files in the specified directory
files = glob.glob(r'C:\\Users\\{}\\Dropbox\\CPLab\\all_data_mat\\*.mat'.format(user))
print(files)

# Print the number of files and the shape of the filter data
print(len(files))
print(filter_data.shape)

# Extract base names without extensions from the list of files
files_list = []
for file in files:
    base_name = os.path.basename(file)
    base_name_without_ext = os.path.splitext(base_name)[0]
    files_list.append(base_name_without_ext)


In [None]:

# Find uncommon elements between the list of files and filter data
set1 = set(files_list)
set2 = set(filter_data['file'])
uncommon_elements = set1.symmetric_difference(set2)
uncommon_elements_list = list(uncommon_elements)
print(uncommon_elements_list)

# These two files don't exist and are a typo in the filecheck.xlsx file
# ['20230609_dk1_BW_nocontext_day2', '20230822_dk1_BW_nocontext_os2_day2']


In [None]:
"""Here we will drop the data file columns that are "NO" in the manual annotation file(filecheck.xlsx)"""

# Define channel names
channel_names = ['LFP1_AON', 'LFP2_AON', 'LFP3_AON', 'LFP4_AON', 'LFP1_vHp', 'LFP2_vHp']

# Initialize a list to store files that are not found
not_here_list = []

# Iterate over each file and delete specified columns if necessary
for file in files: #Loading the data files
    f = h5py.File(file, 'r+') #reading the file with the option of writing to it by using "r+" 
    try:
        base_name = os.path.basename(file)
        base_name_without_ext = os.path.splitext(base_name)[0]
        if base_name_without_ext in filter_base_names_without_ext: #checking if the data file exists in filecheck.xlsx 
            print(base_name_without_ext, 'yes')
            row = filter_data.loc[filter_data['file'] == base_name_without_ext] #locates the row of the file in filecheck.xlsx
            for column in row.columns: #Iterating through the manual annotations
                value = row[column].values[0]
                if value == 'no' or value == 'NO': #If the annotation of a channel is "NO"
                    print(f"Column '{column}' has value 'no'")
                    if column in f.keys():
                        del f[column] # Deletes that channel in the data file
                        print(f"Deleted key '{column}' from file '{file}'")
                elif value == 'ok' or value == 'OK': # If the annotation of a channel is  "OK"
                    print(f"Column '{column}' has value 'ok'") #Do nothing
        else:
            print(base_name_without_ext, 'this file not here') #if the data file does not exist in filecheck.xlsx
            not_here_list.append(base_name_without_ext) 
    finally:
        f.close() #Close the data file. This saves the manipulations we have made in the above code.

print(not_here_list)


In [None]:

# Check the channels in each file
for file in files:
    f = h5py.File(file, 'r')
    channels = f.keys()
    print(file, channels)
    f.close()


==================================================== End of Main Code ==============================================================

### The below code is to check for Keyboard annotations in data files.

In [None]:

# Define a function to convert decimal numbers to Unicode characters
def decimal_to_unicode(decimal_list):
    """
    Convert a list of decimal numbers to their corresponding Unicode characters.
    
    Parameters:
    decimal_list (list): List of decimal numbers.
    
    Returns:
    list: List of Unicode characters.
    """
    return [chr(decimal) for decimal in decimal_list]

# Example usage of decimal_to_unicode function
decimal_list = [65, 66, 67, 8364]  # A, B, C, â‚¬
unicode_characters = decimal_to_unicode(decimal_list)
print(unicode_characters)

# Define a function to combine strings with an optional separator
def combine_strings(string_list, separator=''):
    """
    Combine strings in a list into a single string with an optional separator.
    
    Parameters:
    string_list (list): List of strings to combine.
    separator (str): Separator to use between strings. Default is an empty string.
    
    Returns:
    str: Combined string.
    """
    return separator.join(string_list)

# Example usage of combine_strings function
string_list = ['Hello', 'world', 'this', 'is', 'a', 'test']
combined_string = combine_strings(string_list, ' ')
print(combined_string)

# Initialize lists to store codes and comments from all files
codes_all = []
comments_all = []

# Iterate over each file and extract codes and comments
for file in files:
    f = h5py.File(file, 'r')
    channels = list(f.keys())

    # Check for the presence of specific channels and extract events
    if 'Keyboard' in channels:
        events = f['Keyboard']
    elif 'keyboard' in channels:
        events = f['keyboard']
    elif 'memory' in channels:
        events = f['memory']
    elif 'Memory' in channels:
        events = f['Memory']

    # Print the title of the events
    print(events['title'][:, 0])
    print(combine_strings(decimal_to_unicode(events['title'][:, 0]), ''))

    # Append codes and comments to the respective lists
    codes_all.append(np.array(events['codes'][0]))
    comments_all.append(np.array(events['comment']))

    # Convert comments to Unicode and combine them into a single string
    please = decimal_to_unicode(events['comment'][:, 0])
    combined_string = combine_strings(please, ' ')
    print(combined_string)

# Concatenate all codes and comments
codes_all = np.concatenate(codes_all)
comments_all = np.concatenate(comments_all)

# Print unique codes and comments
codes_all_unique = np.unique(codes_all)
comments_all_unique = np.unique(comments_all)
print(codes_all_unique)
print(comments_all_unique)