In [5]:
import os
import h5py
import numpy as np
import pandas as pd

In [3]:
artifact_folder = '/Users/tereza/spring_2025/STAT_4830/STAT-4830-GOALZ-project/data/ANPHY-Sleep/artifact_matrix'

# List all .mat files in the folder
artifact_files = [f for f in os.listdir(artifact_folder) if f.endswith('.mat')]
artifact_files = sorted(artifact_files)  # Sort alphabetically

print('Artifact files found:')
print(artifact_files)

# Inspect the structure of one artifact matrix file
if artifact_files:
    # Select the first file (or change the index to inspect a different file)
    file_to_inspect = artifact_files[0]
    file_path = os.path.join(artifact_folder, file_to_inspect)
    
    print(f"\nInspecting file: {file_to_inspect}\n")
    
    with h5py.File(file_path, 'r') as f:
        print("Keys present in the file:")
        for key in f.keys():
            data = np.array(f[key])
            print(f"- {key}: shape = {data.shape}, dtype = {data.dtype}")
else:
    print("No artifact .mat files found in the specified folder.")

Artifact files found:
['EPCTL01_artndxn.mat', 'EPCTL02_artndxn.mat', 'EPCTL04_artndex.mat', 'EPCTL05_artdnex.mat', 'EPCTL06_artndxn.mat', 'EPCTL07_artndxn.mat', 'EPCTL08_artndxn.mat', 'EPCTL09_artndxn.mat', 'EPCTL10_artndxn.mat', 'EPCTL11_artndxn.mat', 'EPCTL12_artndxn.mat', 'EPCTL13_artndxn.mat', 'EPCTL14_artndxn.mat', 'EPCTL15_artndxn.mat', 'EPCTL16_artndxn.mat', 'EPCTL17_artndxn.mat', 'EPCTL19_artndxn.mat', 'EPCTL20_artndxn.mat', 'EPCTL21_artndxn.mat', 'EPCTL22_artndxn.mat', 'EPCTL23_artndxn.mat', 'EPCTL24_artndxn.mat', 'EPCTL25_artndxn.mat', 'EPCTL26_artndxn.mat', 'EPCTL27_artndxn.mat', 'EPCTL28_artndxn.mat', 'EPCTL29_artndxn.mat', 'EPCTl03_artndxn.mat', 'EPCTl18_artndxn.mat']

Inspecting file: EPCTL01_artndxn.mat

Keys present in the file:
- artndxn: shape = (957, 83), dtype = uint8
- visgood: shape = (2,), dtype = uint64
- visnum: shape = (958, 1), dtype = float32


In [6]:
def mat_to_csv(mat_file_path):
    """
    Convert each dataset in a .mat file (assumed to be in HDF5 format)
    into a separate CSV file saved in the same folder as the .mat file.
    """
    # Use the folder of the mat file as the output folder
    output_folder = os.path.dirname(mat_file_path)
    file_base = os.path.splitext(os.path.basename(mat_file_path))[0]
    
    with h5py.File(mat_file_path, 'r') as f:
        for key in f.keys():
            try:
                data = np.array(f[key])
                
                # If the data is one-dimensional, reshape it as a column vector.
                if data.ndim == 1:
                    data = data.reshape(-1, 1)
                
                # If the data has more than two dimensions, print a message and skip conversion.
                if data.ndim > 2:
                    print(f"Key '{key}' in {file_base} has {data.ndim} dimensions. Skipping conversion.")
                    continue
                
                # Convert to DataFrame and save as CSV
                df = pd.DataFrame(data)
                csv_filename = f"{file_base}_{key}.csv"
                csv_file_path = os.path.join(output_folder, csv_filename)
                df.to_csv(csv_file_path, index=False)
                print(f"Saved CSV for key '{key}' to: {csv_file_path}")
            except Exception as e:
                print(f"Error converting key '{key}' in {file_base}: {e}")

# Update artifact_folder to your folder containing the .mat files
artifact_folder = '/Users/tereza/spring_2025/STAT_4830/STAT-4830-GOALZ-project/data/ANPHY-Sleep/artifact_matrix'
mat_files = [os.path.join(artifact_folder, f) for f in os.listdir(artifact_folder) if f.endswith(".mat")]

print("Found .mat files:")
print(mat_files)

# Process each .mat file and convert it to CSV files in the same folder
for mat_file in mat_files:
    print(f"\nConverting file: {mat_file}")
    mat_to_csv(mat_file)

Found .mat files:
['/Users/tereza/spring_2025/STAT_4830/STAT-4830-GOALZ-project/data/ANPHY-Sleep/artifact_matrix/EPCTL21_artndxn.mat', '/Users/tereza/spring_2025/STAT_4830/STAT-4830-GOALZ-project/data/ANPHY-Sleep/artifact_matrix/EPCTL13_artndxn.mat', '/Users/tereza/spring_2025/STAT_4830/STAT-4830-GOALZ-project/data/ANPHY-Sleep/artifact_matrix/EPCTL28_artndxn.mat', '/Users/tereza/spring_2025/STAT_4830/STAT-4830-GOALZ-project/data/ANPHY-Sleep/artifact_matrix/EPCTL06_artndxn.mat', '/Users/tereza/spring_2025/STAT_4830/STAT-4830-GOALZ-project/data/ANPHY-Sleep/artifact_matrix/EPCTl03_artndxn.mat', '/Users/tereza/spring_2025/STAT_4830/STAT-4830-GOALZ-project/data/ANPHY-Sleep/artifact_matrix/EPCTL24_artndxn.mat', '/Users/tereza/spring_2025/STAT_4830/STAT-4830-GOALZ-project/data/ANPHY-Sleep/artifact_matrix/EPCTL16_artndxn.mat', '/Users/tereza/spring_2025/STAT_4830/STAT-4830-GOALZ-project/data/ANPHY-Sleep/artifact_matrix/EPCTL15_artndxn.mat', '/Users/tereza/spring_2025/STAT_4830/STAT-4830-GOALZ-