# This script is to summarize .mat data from "process_organoid_data.m" and create .npz files

In [1]:
# Standard Libaries
import numpy as np
import scipy
import os
import contextlib
import datetime

In [2]:
# Load Intan Libraries
import sys
sys.path.append(os.path.join(os.getcwd(), "load_intan_rhd_format"))

from load_intan_rhd_format import read_data


In [3]:
# DataJoint Pipeline
if os.path.basename(os.getcwd()) == "matlab_pipeline":
    os.chdir("..")
if os.path.basename(os.getcwd()) == "notebooks":
    os.chdir("..")

from workflow.pipeline import ephys

[2025-03-28 11:15:59,830][INFO]: Connecting judewerth@db.datajoint.com:3306
[2025-03-28 11:16:00,530][INFO]: Connected judewerth@db.datajoint.com:3306


In [None]:
def loadmat(filename: str, variables: list|np.ndarray) -> tuple:

    # Load .mat file
    data = scipy.io.loadmat(filename, variable_names=variables)

    if len(variables) == 1:
        return data[variables[0]]
    else:
        return tuple(data[var] for var in variables)

    
def loadrhd(filename: str) -> dict:

    # Load rhd file without producing console output (print)
    with open(os.devnull, 'w') as devnull:
        with contextlib.redirect_stdout(devnull):
            data = read_data(filename)
            return data
        
def get_impedance_values(data: dict) -> np.ndarray:
    impedance_values = np.array([d["electrode_impedance_magnitude"] for d in data["amplifier_channels"]])
    return impedance_values.reshape(4, 32) # port ID -> channels

def get_time_array(type_str = False):

    # Create an empty array to store time objects
    time_array = np.empty(24 * 60, dtype=object)
    
    # Fill the array with time objects for each minute of the day
    for hour in range(24):
        for minute in range(60):
            index = hour * 60 + minute
            time_object = datetime.time(hour=hour, minute=minute)

            if type_str:
                time_array[index] = time_object.strftime("%H%M")
            else:
                time_array[index] = time_object
    
    return time_array

def get_elec_mapping():
    
    return np.array([16, 15, 17, 14, 18, 13, 20, 11, 21, 10, 22,  9, 31,  0, 30,  1, 29, 2, 27,  4, 26,  5, 25,  6, 24,  7, 28,  3, 23,  8, 19, 12])

def get_num_elec_inside(batch_id: str) -> int:

    elec_inside = {
                "O9-12": [32, 16, 20, 14],
                "O13-16": [25, 13, 11, 11],
                "O17-20": [22, 19, 20, 17],
                "O21-24": [18, 21, 22, 23],
                "O25-28": [20, 32, 26, 24],
                }
    return elec_inside[batch_id]


In [5]:
def get_minute_data(filename: str):
    features, rms = loadmat(filename, ["features", "rms"])

    features = np.mean(features, axis=1).reshape(4, 32, 10)
    features[:,:,7:] = features[:,:,7:] * (1000/125) # converting spikes/125ms -> spikes/s

    rms = np.reshape(rms.flatten(), [4, 32])

    # Account for electrode mapping
    elec_mapping = get_elec_mapping()
    features = features[:, elec_mapping, :]
    rms = rms[:, elec_mapping]
    

    return features, rms

In [164]:
def mat_path_to_rhd_path(mat_path: str) -> str:
    # Specific to organization of recording data

    return mat_path.replace('processed', 'raw')

In [205]:
def organize_data(filepath = None):

    # Processed Data Directory
    processed_data_dir = os.path.join(os.getcwd(), "notebooks", "matlab_pipeline", "processed_data")

    # Filter Values
    # rms (uV)
    rms1 = 4 
    rms2 = 100
    # Impedance (Ohms)
    I1 = 50000
    I2 = 2000000

    print("Gathering Directory Information")
    # If no input, filepath is current working directory
    if filepath is None:
        filepath = os.getcwd()
    
    # Find File Names in Directory
    filenames = np.array([file for file in os.listdir(filepath) if file.endswith('.mat')])

    # Get all Info (org ids, dates, times) in Directory
    parts_array = []
    for file in filenames:
        parts = file.split("_")
        
        # remove suffix for time
        parts[2] = parts[2].removesuffix(".mat")
        parts_array.append(parts)
    parts_array = np.array(parts_array).transpose()
    batch_ids, dates, times = parts_array

    time_array = get_time_array(type_str=True)

    # Get Batch Id
    if ~np.all(batch_ids == batch_ids[0]): 
        raise ValueError(f"This Folder contains multiple batch recordings. Please seperate batch recordings into different folders.")
    else:
        batch_id = batch_ids[0]
        print(f"Batch = {batch_id}")

    # Central Loop
    for date in np.unique(dates): # Loop through dates (create a file per date)
        print(f"Processing {date}")

        # Initialize Data Array and Masks
        data_array = np.full((len(time_array), 4, 32, 10), np.NaN)
        minute_mask = np.zeros(time_array.shape, dtype=bool)
        rms_mask = np.zeros((len(time_array), 4, 32), dtype=bool)

        # Find org ids and times for each date
        date_times = times[dates == date]

        # Get Information For Eeach Day
        for time in date_times:

            # Find file for given date and time and load data
            file = filenames[(dates==date) & (times==time)][0]
            features, rms = get_minute_data(os.path.join(filepath, file))

            # # Update Data Array and Masks
            time_index = time_array == time[:4] # HHMMSS -> HHMM (format of time_array) (time_index=logical array)
            data_array[time_index, :,:,:] = features
            minute_mask[time_index] = True
            rms_mask[time_index, :,:] = (rms >= rms1) & (rms <= rms2)

            if time == date_times[-1]: # If it's the last time of the day recording

                # Get Electrodes Inside/Outside Mask
                num_elec = get_num_elec_inside(batch_id)
                elec_inside_mask = np.zeros((4, 32), dtype=bool)
                for i, org_elec in enumerate(num_elec):
                    elec_inside_mask[i,:org_elec] = True
  
                # Get RHD File
                rhd_path = mat_path_to_rhd_path(filepath) # Change this function based on file directory info
                rhd_filenames = os.listdir(rhd_path)
                datetime_str = date + "_" + time
                rhd_file = rhd_filenames[datetime_str in rhd_filenames]

                # Get Impedance Mask
                rhd_data = loadrhd(os.path.join(rhd_path, rhd_file))
                impedance_values = get_impedance_values(rhd_data)
                impedance_mask = (impedance_values > I1) & (impedance_values < I2)

        # Make Batch Directory
        os.makedirs(os.path.join(processed_data_dir, batch_id), exist_ok=True)

        # Save Date File In Directory
        np.savez(
            os.path.join(processed_data_dir, batch_id, date + ".npz"),
            data = data_array,
            minute_mask = minute_mask,
            rms_mask = rms_mask,
            elec_inside_mask = elec_inside_mask,
            impedance_values = impedance_values,
        )

        print(f"{date}.npz saved")
        print(f"{np.sum(minute_mask)}/1440 minutes recorded")
        print(f"{np.sum(num_elec)}/128 electrodes inside organoids")
        print(f"{np.sum(impedance_mask)}/128 electrodes in valid impedance range ({I1}-{I2} ohms)")
        print(f"{(np.sum(rms_mask)/np.size(rms_mask[minute_mask,:,:]))*100}% of data in valid rms range ({rms1}-{rms2} uV)")

                


In [None]:
organize_data("D:/O09-12_processed/")


Gathering Directory Information
Batch = O9-12
Processing 230503
230503.npz saved
387/1440 minutes recorded
82/128 electrodes inside organoids
57/128 electrodes in valid impedance range (50000-2000000 ohms)
83.00024224806202% of data in valid rms range (4-100 uV)
Processing 230504
230504.npz saved
1440/1440 minutes recorded
82/128 electrodes inside organoids
57/128 electrodes in valid impedance range (50000-2000000 ohms)
87.81846788194444% of data in valid rms range (4-100 uV)
Processing 230505
230505.npz saved
1440/1440 minutes recorded
82/128 electrodes inside organoids
57/128 electrodes in valid impedance range (50000-2000000 ohms)
80.26312934027777% of data in valid rms range (4-100 uV)
Processing 230506
230506.npz saved
1440/1440 minutes recorded
82/128 electrodes inside organoids
57/128 electrodes in valid impedance range (50000-2000000 ohms)
77.03938802083333% of data in valid rms range (4-100 uV)
Processing 230507
230507.npz saved
1440/1440 minutes recorded
82/128 electrodes ins