# MATLAB and Data File I/O
- Reading/Writing MATLAB files, NetCDF, IDL, Matrix Market
- Real examples: Data interchange, Scientific data formats

In [None]:
import numpy as np
from scipy import io
import tempfile
import os
print('scipy.io module loaded')

## MATLAB File Format (.mat)

**Purpose**: Exchange data between Python and MATLAB
**Versions**: v4, v5-7.2, v7.3 (HDF5)

**Functions**:
- `savemat()`: Write .mat file
- `loadmat()`: Read .mat file
- `whosmat()`: List variables in .mat file

In [None]:
# Create sample data
data_dict = {
    'array_1d': np.array([1, 2, 3, 4, 5]),
    'array_2d': np.array([[1, 2, 3], [4, 5, 6]]),
    'matrix': np.random.rand(3, 3),
    'string': 'Hello from Python',
    'number': 42
}

print('MATLAB File I/O\n')
print('Data to save:')
for key, val in data_dict.items():
    if isinstance(val, np.ndarray):
        print(f'  {key}: shape {val.shape}, dtype {val.dtype}')
    else:
        print(f'  {key}: {type(val).__name__} = {val}')

In [None]:
# Save to MATLAB file
with tempfile.NamedTemporaryFile(suffix='.mat', delete=False) as f:
    mat_file = f.name

io.savemat(mat_file, data_dict)
print(f'\nSaved to: {os.path.basename(mat_file)}')
print(f'File size: {os.path.getsize(mat_file)} bytes\n')

# List contents
print('Contents of .mat file:')
contents = io.whosmat(mat_file)
for name, shape, dtype in contents:
    print(f'  {name}: shape {shape}, dtype {dtype}')

In [None]:
# Load from MATLAB file
loaded_data = io.loadmat(mat_file)

print('\nLoaded data:')
for key in data_dict.keys():
    if key in loaded_data:
        val = loaded_data[key]
        if isinstance(val, np.ndarray):
            print(f'  {key}: shape {val.shape}')
            if val.size <= 5:
                print(f'    Values: {val.flatten()}')
        else:
            print(f'  {key}: {val}')

# Cleanup
os.unlink(mat_file)
print('\n✓ MATLAB file I/O successful')

## Real Example: Sharing ML Model Weights

**Scenario**: Export trained model weights for MATLAB analysis
**Use case**: Collaboration between Python/MATLAB teams

In [None]:
print('Machine Learning Model Export\n')

# Simulate neural network weights
np.random.seed(42)
model_weights = {
    'layer1_weights': np.random.randn(784, 128),
    'layer1_bias': np.random.randn(128),
    'layer2_weights': np.random.randn(128, 64),
    'layer2_bias': np.random.randn(64),
    'output_weights': np.random.randn(64, 10),
    'output_bias': np.random.randn(10),
    'accuracy': np.array([0.95]),
    'epochs': np.array([50])
}

print('Model architecture:')
print('  Input: 784 (28×28 image)')
print('  Hidden1: 128 neurons')
print('  Hidden2: 64 neurons')
print('  Output: 10 classes\n')

total_params = sum(w.size for w in model_weights.values() if isinstance(w, np.ndarray))
print(f'Total parameters: {total_params:,}\n')

# Save for MATLAB
with tempfile.NamedTemporaryFile(suffix='.mat', delete=False) as f:
    model_file = f.name

io.savemat(model_file, model_weights)
print(f'Saved to: {os.path.basename(model_file)}')
print(f'File size: {os.path.getsize(model_file):,} bytes')
print('\nMALAB users can now load with: load("model.mat")')

os.unlink(model_file)

## Matrix Market Format (.mtx)

**Purpose**: Sparse matrix exchange format
**Standard**: NIST Matrix Market
**Use**: Share sparse matrices, linear algebra benchmarks

In [None]:
from scipy import sparse

print('Matrix Market Format\n')

# Create sparse matrix
row = np.array([0, 0, 1, 2, 2, 2])
col = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
sparse_matrix = sparse.coo_matrix((data, (row, col)), shape=(3, 3))

print('Sparse matrix:')
print(sparse_matrix.toarray())
print(f'\nDensity: {sparse_matrix.nnz / (3*3) * 100:.1f}%')
print(f'Non-zeros: {sparse_matrix.nnz}')

In [None]:
# Save to Matrix Market
with tempfile.NamedTemporaryFile(suffix='.mtx', delete=False, mode='w') as f:
    mtx_file = f.name

io.mmwrite(mtx_file, sparse_matrix)
print(f'\nSaved to: {os.path.basename(mtx_file)}')

# Read back
loaded_matrix = io.mmread(mtx_file)
print(f'Loaded: {loaded_matrix.shape}, {loaded_matrix.nnz} non-zeros')
print('Matrix:')
print(loaded_matrix.toarray())

os.unlink(mtx_file)
print('\n✓ Matrix Market I/O successful')

## IDL Save Format (.sav)

**Purpose**: Read IDL (Interactive Data Language) files
**Note**: Read-only in scipy
**Use**: Legacy scientific data

In [None]:
print('IDL Save Format\n')
print('scipy.io.readsav() can read IDL .sav files')
print('Commonly used in astronomy and atmospheric science')
print('\nExample usage:')
print('  data = io.readsav("data.sav")')
print('  # Returns dict with IDL variables')

## Fortran Unformatted Files

**Purpose**: Read Fortran binary output
**Function**: `FortranFile`
**Use**: Legacy simulation data

In [None]:
print('Fortran Binary Files\n')
print('scipy.io.FortranFile() can read/write Fortran unformatted files')
print('Common in computational physics and CFD')
print('\nExample:')
print('  from scipy.io import FortranFile')
print('  f = FortranFile("output.dat", \'r\')')
print('  data = f.read_reals(dtype=float)')

## Real Example: Scientific Data Pipeline

**Scenario**: Process data from multiple sources
**Input**: MATLAB simulation, Fortran solver output
**Output**: Analysis results

In [None]:
print('Scientific Data Pipeline\n')

# Step 1: Generate simulation data (simulating MATLAB output)
simulation_data = {
    'time': np.linspace(0, 10, 100),
    'temperature': 300 + 50 * np.sin(np.linspace(0, 2*np.pi, 100)),
    'pressure': 101325 + 1000 * np.cos(np.linspace(0, 2*np.pi, 100)),
    'simulation_params': {
        'dt': 0.1,
        'method': 'RK4',
        'tolerance': 1e-6
    }
}

print('Pipeline stages:')
print('  1. Load MATLAB simulation data')
print('  2. Process and analyze')
print('  3. Export results\n')

# Save simulation
with tempfile.NamedTemporaryFile(suffix='.mat', delete=False) as f:
    sim_file = f.name
io.savemat(sim_file, simulation_data)

print(f'Simulation data: {os.path.getsize(sim_file):,} bytes')

# Load and analyze
data = io.loadmat(sim_file)
temp = data['temperature'].flatten()
pressure = data['pressure'].flatten()

print(f'\nAnalysis results:')
print(f'  Temperature: {temp.min():.1f}K to {temp.max():.1f}K')
print(f'  Pressure: {pressure.min():.0f} to {pressure.max():.0f} Pa')
print(f'  Mean temp: {temp.mean():.1f}K')
print(f'  Mean pressure: {pressure.mean():.0f} Pa')

# Export results
results = {
    'temp_stats': np.array([temp.min(), temp.mean(), temp.max()]),
    'pressure_stats': np.array([pressure.min(), pressure.mean(), pressure.max()]),
    'correlation': np.corrcoef(temp, pressure)[0, 1]
}

with tempfile.NamedTemporaryFile(suffix='.mat', delete=False) as f:
    results_file = f.name
io.savemat(results_file, results)

print(f'\nResults exported: {os.path.basename(results_file)}')

# Cleanup
os.unlink(sim_file)
os.unlink(results_file)
print('\n✓ Pipeline complete')

## Summary

### MATLAB Files:
```python
from scipy import io

# Save
io.savemat('data.mat', {'var1': array1, 'var2': array2})

# Load
data = io.loadmat('data.mat')
array1 = data['var1']

# Inspect
contents = io.whosmat('data.mat')
```

### Matrix Market (Sparse):
```python
# Write
io.mmwrite('matrix.mtx', sparse_matrix)

# Read
matrix = io.mmread('matrix.mtx')
```

### IDL Save:
```python
# Read only
data = io.readsav('data.sav')
```

### Fortran:
```python
from scipy.io import FortranFile
f = FortranFile('output.dat', 'r')
data = f.read_reals(dtype=float)
```

### Use Cases:

**MATLAB Files**:
- Python ↔ MATLAB collaboration
- Legacy code integration
- Model weight sharing
- Simulation data exchange

**Matrix Market**:
- Sparse matrix benchmarks
- Linear algebra test cases
- Standard format for comparison

**IDL/Fortran**:
- Legacy scientific data
- Astronomy data (IDL)
- CFD simulation output (Fortran)

### Best Practices:
✓ Check MATLAB version compatibility  
✓ Use compression for large files  
✓ Document units and conventions  
✓ Test round-trip (save → load)  
✓ Handle metadata carefully