In [None]:
import scipy.io as sio
import numpy as np
import time
import os

def read_mat_file(file_path, output_format='dict'):
    """
    Reads a .mat file from a file path and converts it to a specified format.
    Includes debug print statements to track progress.
    """
    start_time = time.time()
    print(f"[{time.time() - start_time:.2f}s] Starting read_mat_file with file: {file_path}")
    
    try:
        # Check if file exists
        print(f"[{time.time() - start_time:.2f}s] Checking if file exists")
        if not os.path.exists(file_path):
            print(f"[{time.time() - start_time:.2f}s] Error: File '{file_path}' does not exist")
            return None
        
        # Get file size
        file_size = os.path.getsize(file_path) / (1024 * 1024)  # Size in MB
        print(f"[{time.time() - start_time:.2f}s] File size: {file_size:.2f} MB")
        
        # Load the .mat file
        print(f"[{time.time() - start_time:.2f}s] Loading .mat file")
        mat_data = sio.loadmat(file_path)
        print(f"[{time.time() - start_time:.2f}s] Loaded .mat file successfully")
        
        # Remove MATLAB metadata keys
        print(f"[{time.time() - start_time:.2f}s] Cleaning metadata")
        cleaned_data = {key: value for key, value in mat_data.items() 
                       if not key.startswith('__')}
        print(f"[{time.time() - start_time:.2f}s] Metadata cleaned. Variables: {list(cleaned_data.keys())}")
        
        if output_format.lower() == 'dict':
            print(f"[{time.time() - start_time:.2f}s] Returning as dict")
            return cleaned_data
        elif output_format.lower() == 'numpy':
            print(f"[{time.time() - start_time:.2f}s] Returning as numpy arrays")
            return cleaned_data  # Already NumPy arrays from scipy.io.loadmat
        else:
            raise ValueError("Unsupported output format. Use 'dict' or 'numpy'.")
    except Exception as e:
        print(f"[{time.time() - start_time:.2f}s] Error: {str(e)}")
        return None

def print_data_summary(data):
    print("\nData Summary:")
    if isinstance(data, dict):
        for key, value in data.items():
            print(f"\nVariable: {key}")
            print(f"Type: {type(value)}")
            if isinstance(value, np.ndarray):
                print(f"Shape/Dimensions: {value.shape}")
                print(f"Preview: \n{value[:5]}")
            else:
                print(f"Value: {value}")
    else:
        print("Data is not in a dictionary format.")

In [None]:
# Create a text input for the file path
file_path_input = input(
    "Enter the full path to your .mat file (e.g., /mnt/c/path/to/file.mat): "
).strip()

# Process the file
if file_path_input:
    overall_start = time.time()
    print(f"[0.00s] Starting processing for file: {file_path_input}")

    output_format = "numpy"  # Using NumPy arrays

    # Read and convert the .mat file
    converted_data = read_mat_file(file_path_input, output_format=output_format)

    if converted_data is not None:
        print(f"[{time.time() - overall_start:.2f}s] Printing data summary")
        global converted_data_global
        converted_data_global = converted_data
        print_data_summary(converted_data)

        # Extract the base name from the input file path and change extension to .npz
        base_name = os.path.splitext(file_path_input)[0]  # Removes .mat
        output_file = f"{base_name}.npz"
        print(
            f"[{time.time() - overall_start:.2f}s] Saving NumPy arrays to compressed '{output_file}'"
        )
        np.savez_compressed(output_file, **converted_data)
        print(
            f"[{time.time() - overall_start:.2f}s] Saved compressed arrays to '{output_file}' in {os.getcwd()}"
        )

        print(f"[{time.time() - overall_start:.2f}s] Processing complete!")
    else:
        print(f"[{time.time() - overall_start:.2f}s] Processing failed.")
else:
    print("No file path provided.")

In [None]:
if "converted_data_global" in globals() and converted_data_global is not None:
    print("Available variables:", list(converted_data_global.keys()))
else:
    print("No data has been processed yet. Run Cell 3 with a valid file path.")