# Preprocessing .mat for NWB

## Function

In [1]:
import numpy as np
import h5py
import os
import yaml
import pandas as pd
from datetime import datetime
from NWB_conversion import (convert_data_to_nwb_an)


ImportError: cannot import name 'get_subject_data_folder' from 'utils.server_paths' (/Users/lorisfabbro/Desktop/Divers/LSENS/NWB_converter_AN/utils/server_paths.py)

In [5]:


def search_and_open_mat(mouse_id: str, last_done_day: str):
    """
    Search and open a .mat file from a mounted network drive (/Volumes/WR on macOS).

    :param mouse_id: Mouse ID to search  e.g., "AO039"
    :param last_done_day: Last done day as a string e.g., "20190626"
    :return: Path to the found .mat file
    """

    # Build the full path to the .mat file
    filename = f"{mouse_id}_{last_done_day}.mat"
    file_path = os.path.join("/Volumes/WR", filename)

    # Check if the file exists
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")

    # Try opening the file using h5py (for .mat v7.3 HDF5 format)
    try:
        with h5py.File(file_path, 'r') as f:
            print(f"✅ File '{filename}' opened successfully.")

    except PermissionError as e:
        print("❌ Permission denied: Python can't access this file.")
        print("💡 Check macOS privacy settings (System Preferences > Privacy > Full Disk Access).")
        raise e

    except OSError as e:
        print("❌ Error opening file. Is it an HDF5 (MATLAB v7.3) .mat file?")
        raise e
    
    return file_path



In [None]:


def files_to_config(mat_file, csv_file, output_path="data/config.yaml"):
    """
    Converts a .mat file and csv_file into a .yaml configuration file for the NWB pipeline.

    :param mat_file: Path to the .mat file
    :return: Configuration dictionary + path to the yaml file
    """
    # PART 1: Load the .mat file and extract metadata
    # Load the .mat file
    with h5py.File(mat_file, 'r') as f:
        data_group = f['Data'] if 'Data' in f else f
        data = {key: data_group[key][()] for key in data_group.keys()}

    # Extract relevant information
    mouse = ''.join(chr(c) for c in data['mouse'].flatten())
    date = ''.join(chr(c) for c in data['date'].flatten())
    experimenter = EXPERIMENTER_MAP.get(mouse[:2], 'Inconnu')



    session_id = f"{mouse}_{date}"
    start_time = datetime.strptime(date, "%Y%m%d")



    # PART 2: Load the CSV file to extract additional metadata
    csv_data = pd.read_csv(csv_file)
    subject_info = csv_data[csv_data['subject_id'] == mouse].iloc[0]

    # Extract additional metadata
    age = subject_info['age']
    genotype = subject_info['genotype']
    sex = subject_info['sex']
    weight = subject_info['weight']


    
    # Construct the output YAML path
    config = {
        'session_metadata': {
            'session_id': session_id,
            'identifier': session_id,
            'experimenter': experimenter,
            'description': 'Session electrophysiologie',
            'start_time': start_time.strftime('%Y%m%d 120000'),
            'lab': 'Petersen Lab',
            'institution': 'EPFL'
        },
        'subject_metadata': {
            'age': 'P62D',
            'age__reference': 'birth',
            'date_of_birth': '02/19/2025',
            'description': 'AB164',
            'genotype': 'WT',
            'sex': 'F',
            'species': 'Mus musculus',
            'strain': 'C57BL/6',
            'subject_id': 'AB164',
            'weight': 15.1
        },
        'ephys_metadata': {
            'processed': 1
        }
    }
    """
        subject_metadata:
    age: P62D
    age__reference: birth
    date_of_birth: 02/19/2025
    description: AB164
    genotype: WT
    sex: F
    species: Mus musculus
    strain: C57BL/6
    subject_id: AB164
    weight: 15.1
    """

    # save config
    with open(output_path, 'w') as f:
        yaml.dump(config, f, default_flow_style=False)

    print(f"✅ Config YAML sauvegardé à : {output_path}")
    
    return output_path, config




# VALIDATION

In [14]:
import scipy.io
import h5py

filename = "AO039_20190626.mat"  

with h5py.File(filename, 'r') as f:
    data_group = f['Data']
    data_ref = f["#refs#"]
    #print("Références trouvées :")
    #for key in data_ref.keys():
    #    print(f" - {key} → shape: {data_ref[key].shape}, dtype: {data_ref[key].dtype}")
    print("Contenu de 'Data' :")
    for key in data_group.keys():
        print(f" - {key} → shape: {data_group[key].shape}, dtype: {data_group[key].dtype}")


Contenu de 'Data' :
 - ARAindex → shape: (75, 1), dtype: float64
 - Area → shape: (75, 1), dtype: object
 - BaselineFR_Mean → shape: (75, 1), dtype: float64
 - BaselineFR_Session → shape: (75, 1), dtype: object
 - CRIndices → shape: (1, 583), dtype: uint8
 - ClusterCounter → shape: (75, 1), dtype: float64
 - EngagedTrials → shape: (583, 1), dtype: uint8
 - FAIndices → shape: (1, 583), dtype: uint8
 - HitIndices → shape: (1, 583), dtype: uint8
 - ISI_Violation → shape: (75, 1), dtype: float64
 - ISO_Distance → shape: (75, 1), dtype: float64
 - JawOnsetsTms → shape: (583, 1), dtype: float64
 - JawTrace → shape: (583, 1000), dtype: float64
 - LFPs → shape: (3, 1), dtype: object
 - LickData → shape: (482142, 1), dtype: float64
 - LickTime → shape: (482142, 1), dtype: float64
 - LightIndices → shape: (1, 583), dtype: float64
 - MDS → shape: (1, 1), dtype: float64
 - ML_DV_AP → shape: (75, 1), dtype: object
 - ML_DV_AP_32 → shape: (75, 1), dtype: object
 - MissIndices → shape: (1, 583), dtyp

In [36]:
#search_and_open_mat("AO039", "20190626")


In [None]:

filename = "data/AO039_20190626.mat" 
csv = "data/Subject_Session.csv"
config_path, config = files_to_config(filename , csv, output_path="data/AO39_20190626_config.yaml")


✅ Config YAML sauvegardé à : data/AO39_20190626_config.yaml


: 

In [None]:
convert_data_to_nwb_an(
    config_file=config_path,
    output_folder="nwb_output",
    with_time_string=False
)

In [3]:
csv_data = pd.read_csv("data/Subject_Session.csv")
csv_data.head()


ParserError: Error tokenizing data. C error: Expected 1 fields in line 19, saw 2
