# Biologic MPR File Reading Verification

This notebook demonstrates step-by-step verification of reading BioLogic `.mpr` files using the optimized `BiologicMPTReader`.

## Objectives

1. Load `Biologic_GPCL.mpr` file
2. Verify data structure and completeness
3. Inspect all data columns
4. Examine comprehensive metadata
5. Validate data quality
6. Visualize electrochemical data

## 1. Setup and Imports

In [None]:
import sys
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pprint import pprint

# Import the BiologicMPTReader
from echemistpy.utils.external.echem.biologic_reader import BiologicMPTReader

# Configure display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.width', 1000)

# Configure matplotlib
plt.rcParams['figure.figsize'] = (12, 6)
plt.rcParams['font.size'] = 10

print("‚úì Imports successful")

## 2. Load MPR File

Load the `Biologic_GPCL.mpr` file using the `BiologicMPTReader`.

In [None]:
# Define file path
file_path = Path("examples/echem/Biologic_GPCL.mpr")

# Check file exists
if not file_path.exists():
    raise FileNotFoundError(f"File not found: {file_path}")

print(f"File path: {file_path.absolute()}")
print(f"File size: {file_path.stat().st_size / 1024:.2f} KB")

# Create reader and load file
reader = BiologicMPTReader()
measurement = reader.read(file_path)

print(f"\n‚úì Successfully loaded {file_path.name}")
print(f"  Measurement type: {type(measurement).__name__}")

## 3. Inspect Raw Data Structure

Examine the structure of the loaded data including dimensions, coordinates, and data variables.

In [None]:
# Get the xarray Dataset
dataset = measurement.data.data

print("=" * 80)
print("DATASET STRUCTURE")
print("=" * 80)
print(dataset)
print("\n" + "=" * 80)

# Display dimensions
print("\nDIMENSIONS:")
for dim, size in dataset.dims.items():
    print(f"  {dim}: {size}")

# Display coordinates
print("\nCOORDINATES:")
for coord_name in dataset.coords:
    coord = dataset.coords[coord_name]
    print(f"  {coord_name}: shape={coord.shape}, dtype={coord.dtype}")

## 4. Verify All Data Columns

List all available data columns with their properties using the new `get_column_info()` API method.

In [None]:
# Use the new API method to get column information
column_info = reader.get_column_info()

print("=" * 80)
print(f"DATA COLUMNS ({len(column_info)} total)")
print("=" * 80)

# Create a DataFrame for better display
info_df = pd.DataFrame.from_dict(column_info, orient='index')
print(info_df.to_string())

# Display basic statistics for each column
print("\n" + "=" * 80)
print("COLUMN STATISTICS")
print("=" * 80)

stats_data = []
for col_name in dataset.data_vars:
    values = dataset[col_name].values
    stats_data.append({
        'Column': col_name,
        'Min': f"{np.min(values):.6g}",
        'Max': f"{np.max(values):.6g}",
        'Mean': f"{np.mean(values):.6g}",
        'Std': f"{np.std(values):.6g}",
        'Unit': column_info[col_name]['unit'] or 'N/A'
    })

stats_df = pd.DataFrame(stats_data)
print(stats_df.to_string(index=False))

## 5. Examine Metadata

Inspect comprehensive metadata including standard fields and MPR-specific information.

In [None]:
# Get metadata summary using the new API method
metadata_summary = reader.get_metadata_summary()

print("=" * 80)
print("METADATA SUMMARY")
print("=" * 80)
pprint(metadata_summary, width=80)

# Get full metadata
full_meta = measurement.metadata.meta

print("\n" + "=" * 80)
print("FULL METADATA")
print("=" * 80)

# Display standard metadata
print("\nStandard Metadata:")
standard_keys = ['technique', 'sample_name', 'instrument', 'source_file', 'original_filename']
for key in standard_keys:
    if key in full_meta:
        print(f"  {key}: {full_meta[key]}")

# Display MPR-specific metadata
print("\nMPR-Specific Metadata:")
mpr_keys = [k for k in full_meta.keys() if k.startswith('mpr_')]
for key in mpr_keys:
    value = full_meta[key]
    # Format large lists/dicts more compactly
    if isinstance(value, (list, tuple)) and len(value) > 10:
        print(f"  {key}: [{len(value)} items]")
    elif isinstance(value, dict) and len(value) > 5:
        print(f"  {key}: {{{len(value)} keys}}")
    else:
        print(f"  {key}: {value}")

### 5.1 Detailed MPR Module Information

Examine the MPR module structure in detail.

In [None]:
if 'mpr_modules' in full_meta:
    print("=" * 80)
    print("MPR MODULES")
    print("=" * 80)
    modules_df = pd.DataFrame(full_meta['mpr_modules'])
    print(modules_df.to_string(index=False))
else:
    print("No MPR module information available")

# Display VMP settings if available
if 'mpr_vmp_settings' in full_meta:
    print("\n" + "=" * 80)
    print("VMP SETTINGS")
    print("=" * 80)
    pprint(full_meta['mpr_vmp_settings'], width=80)

# Display flags dictionary if available
if 'mpr_flags_dict' in full_meta:
    print("\n" + "=" * 80)
    print("FLAGS DICTIONARY")
    print("=" * 80)
    flags_df = pd.DataFrame([
        {'Flag': k, 'Meaning': v} 
        for k, v in full_meta['mpr_flags_dict'].items()
    ])
    print(flags_df.to_string(index=False))

## 6. Data Quality Validation

Validate the loaded data for consistency and completeness using the new `validate_data()` API method.

In [None]:
# Use the new validation API method
validation_results = reader.validate_data()

print("=" * 80)
print("DATA VALIDATION RESULTS")
print("=" * 80)

print(f"\nValidation Status: {'‚úì PASSED' if validation_results['is_valid'] else '‚úó FAILED'}")
print(f"Number of rows: {validation_results['n_rows']}")
print(f"Number of columns: {validation_results['n_columns']}")
print(f"Has time column: {'‚úì' if validation_results['has_time'] else '‚úó'}")

if validation_results['issues']:
    print("\nIssues found:")
    for i, issue in enumerate(validation_results['issues'], 1):
        print(f"  {i}. {issue}")
else:
    print("\n‚úì No issues found - data is valid and consistent")

## 7. Data Visualization

Visualize key electrochemical data to verify the measurements are meaningful.

### 7.1 Time Series Overview

In [None]:
# Extract time data
time_col = 'time/s'
if time_col in dataset.coords:
    time = dataset.coords[time_col].values
elif time_col in dataset.data_vars:
    time = dataset[time_col].values
else:
    raise ValueError(f"Time column '{time_col}' not found")

# Find voltage and current columns
voltage_cols = [col for col in dataset.data_vars if 'ewe' in col.lower() and '/v' in col.lower()]
current_cols = [col for col in dataset.data_vars if 'i' in col.lower() and 'ma' in col.lower()]

print(f"Found voltage columns: {voltage_cols}")
print(f"Found current columns: {current_cols}")

# Create subplots
fig, axes = plt.subplots(2, 1, figsize=(14, 10), sharex=True)

# Plot voltage vs time
if voltage_cols:
    for col in voltage_cols[:3]:  # Plot up to 3 voltage columns
        axes[0].plot(time, dataset[col].values, label=col, linewidth=1.5)
    axes[0].set_ylabel('Voltage (V)', fontsize=12, fontweight='bold')
    axes[0].legend(loc='best')
    axes[0].grid(True, alpha=0.3)
    axes[0].set_title('Voltage vs Time', fontsize=14, fontweight='bold')

# Plot current vs time
if current_cols:
    for col in current_cols[:3]:  # Plot up to 3 current columns
        axes[1].plot(time, dataset[col].values, label=col, linewidth=1.5)
    axes[1].set_ylabel('Current (mA)', fontsize=12, fontweight='bold')
    axes[1].set_xlabel('Time (s)', fontsize=12, fontweight='bold')
    axes[1].legend(loc='best')
    axes[1].grid(True, alpha=0.3)
    axes[1].set_title('Current vs Time', fontsize=14, fontweight='bold')

plt.tight_layout()
plt.show()

print("\n‚úì Time series plots generated")

### 7.2 Voltage vs Capacity (if available)

In [None]:
# Find capacity columns
capacity_cols = [
    col for col in dataset.data_vars 
    if any(keyword in col.lower() for keyword in ['capacity', 'q charge', 'q discharge', '(q-qo)'])
]

print(f"Found capacity columns: {capacity_cols}")

if capacity_cols and voltage_cols:
    fig, axes = plt.subplots(1, len(capacity_cols[:2]), figsize=(14, 6))
    if len(capacity_cols) == 1:
        axes = [axes]
    
    for idx, cap_col in enumerate(capacity_cols[:2]):
        capacity = dataset[cap_col].values
        voltage = dataset[voltage_cols[0]].values
        
        axes[idx].plot(capacity, voltage, linewidth=1.5, color='darkblue')
        axes[idx].set_xlabel(cap_col, fontsize=11, fontweight='bold')
        axes[idx].set_ylabel(voltage_cols[0], fontsize=11, fontweight='bold')
        axes[idx].set_title(f'Voltage vs {cap_col}', fontsize=12, fontweight='bold')
        axes[idx].grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    print("\n‚úì Voltage vs capacity plots generated")
else:
    print("\nCapacity data not available for plotting")

### 7.3 Cycle Analysis (if cycle data available)

In [None]:
# Find cycle-related columns
cycle_cols = [
    col for col in dataset.data_vars 
    if any(keyword in col.lower() for keyword in ['cycle', 'half cycle', 'loop'])
]

print(f"Found cycle columns: {cycle_cols}")

if cycle_cols:
    cycle_col = cycle_cols[0]
    cycles = dataset[cycle_col].values
    unique_cycles = np.unique(cycles)
    
    print(f"\nCycle information:")
    print(f"  Column: {cycle_col}")
    print(f"  Number of unique cycles: {len(unique_cycles)}")
    print(f"  Cycle range: {unique_cycles.min()} to {unique_cycles.max()}")
    
    # Plot a few cycles if voltage and capacity data available
    if voltage_cols and capacity_cols and len(unique_cycles) > 1:
        fig, ax = plt.subplots(figsize=(12, 6))
        
        # Plot first 5 cycles
        for cycle_num in unique_cycles[:5]:
            mask = cycles == cycle_num
            if mask.sum() > 0:
                cap = dataset[capacity_cols[0]].values[mask]
                volt = dataset[voltage_cols[0]].values[mask]
                ax.plot(cap, volt, label=f'Cycle {int(cycle_num)}', linewidth=1.5, alpha=0.8)
        
        ax.set_xlabel(capacity_cols[0], fontsize=12, fontweight='bold')
        ax.set_ylabel(voltage_cols[0], fontsize=12, fontweight='bold')
        ax.set_title('Voltage vs Capacity by Cycle', fontsize=14, fontweight='bold')
        ax.legend(loc='best')
        ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.show()
        print("\n‚úì Cycle analysis plots generated")
else:
    print("\nNo cycle data available")

## 8. Summary Report

Generate a comprehensive summary of the verification process.

In [None]:
print("=" * 80)
print("VERIFICATION SUMMARY REPORT")
print("=" * 80)

print(f"\nüìÅ File: {file_path.name}")
print(f"   Size: {file_path.stat().st_size / 1024:.2f} KB")
print(f"   Path: {file_path.absolute()}")

print(f"\nüìä Data Structure:")
print(f"   Rows: {validation_results['n_rows']:,}")
print(f"   Columns: {validation_results['n_columns']}")
print(f"   Dimensions: {list(dataset.dims.keys())}")

print(f"\nüî¨ Measurement Information:")
print(f"   Technique: {metadata_summary.get('ec_technique', 'N/A')}")
print(f"   Sample: {metadata_summary.get('sample_name', 'N/A')}")
print(f"   Instrument: {metadata_summary.get('instrument', 'N/A')}")

if 'mpr_version' in metadata_summary:
    print(f"\nüìã MPR File Information:")
    print(f"   Version: {metadata_summary.get('mpr_version', 'N/A')}")
    if 'mpr_npts' in metadata_summary:
        print(f"   Data points: {metadata_summary['mpr_npts']}")
    if 'mpr_start_date' in metadata_summary:
        print(f"   Start date: {metadata_summary['mpr_start_date']}")
    if 'mpr_end_date' in metadata_summary:
        print(f"   End date: {metadata_summary['mpr_end_date']}")

print(f"\n‚úÖ Validation Results:")
print(f"   Status: {'PASSED ‚úì' if validation_results['is_valid'] else 'FAILED ‚úó'}")
print(f"   Time column present: {'Yes ‚úì' if validation_results['has_time'] else 'No ‚úó'}")
if validation_results['issues']:
    print(f"   Issues: {len(validation_results['issues'])}")
    for issue in validation_results['issues']:
        print(f"     - {issue}")
else:
    print(f"   Issues: None ‚úì")

print(f"\nüìà Available Data Types:")
if voltage_cols:
    print(f"   Voltage columns: {len(voltage_cols)} ({', '.join(voltage_cols[:3])})")
if current_cols:
    print(f"   Current columns: {len(current_cols)} ({', '.join(current_cols[:3])})")
if capacity_cols:
    print(f"   Capacity columns: {len(capacity_cols)} ({', '.join(capacity_cols[:3])})")
if cycle_cols:
    print(f"   Cycle columns: {len(cycle_cols)} ({', '.join(cycle_cols)})")

print("\n" + "=" * 80)
print("‚úì VERIFICATION COMPLETE")
print("=" * 80)
print("\nAll verification steps completed successfully!")
print("The BiologicMPTReader can correctly read and parse the MPR file.")