# Biologic Reader Verification Notebook

This notebook demonstrates step-by-step how to use `loaders.py` to read Biologic `.mpr` files using the `biologic_reader.py` plugin.

## Objective
Verify that `src\echemistpy\utils\external\echem\biologic_reader.py` works correctly by:
1. Loading the file using `loaders.py`
2. Inspecting the data structure
3. Examining metadata
4. Displaying data columns and sample rows

## Step 1: Import Required Modules

In [None]:
import sys
from pathlib import Path

# Add src to path if needed
src_path = Path.cwd() / 'src'
if src_path.exists() and str(src_path) not in sys.path:
    sys.path.insert(0, str(src_path))

# Import the loader
from echemistpy.io.loaders import load_data_file

print("✓ Imports successful")

## Step 2: Verify File Existence

In [None]:
# Define the path to the test file
mpr_file = Path('examples/echem/Biologic_GPCL.mpr')

# Check if file exists
if mpr_file.exists():
    print(f"✓ File found: {mpr_file}")
    print(f"  File size: {mpr_file.stat().st_size:,} bytes")
else:
    print(f"✗ File not found: {mpr_file}")
    raise FileNotFoundError(f"Cannot find {mpr_file}")

## Step 3: Load the MPR File

This step uses `load_data_file()` which automatically detects the `.mpr` extension and calls the appropriate loader (`_load_biologic`).

In [None]:
# Load the file
print("Loading file...")
raw_measurement = load_data_file(mpr_file)
print("✓ File loaded successfully")

# Display the type
print(f"\nType: {type(raw_measurement)}")
print(f"Data type: {type(raw_measurement.data)}")
print(f"Metadata type: {type(raw_measurement.metadata)}")

## Step 4: Inspect Data Structure

Let's examine the xarray Dataset structure.

In [None]:
# Get the dataset
dataset = raw_measurement.data.data

print("Dataset Overview:")
print("=" * 60)
print(dataset)
print("\n" + "=" * 60)

## Step 5: Examine Data Columns

In [None]:
# List all data variables (columns)
print("Data Columns:")
print("=" * 60)
for i, col in enumerate(dataset.data_vars, 1):
    var = dataset[col]
    print(f"{i:2d}. {col:30s} - shape: {var.shape}, dtype: {var.dtype}")

print(f"\nTotal columns: {len(dataset.data_vars)}")
print(f"Total rows: {dataset.sizes.get('row', 'N/A')}")

## Step 6: Display Sample Data

Show the first few rows of data.

In [None]:
# Convert to pandas DataFrame for easier viewing
import pandas as pd

df = dataset.to_dataframe()

print("First 10 rows of data:")
print("=" * 60)
display(df.head(10))

print("\nLast 10 rows of data:")
print("=" * 60)
display(df.tail(10))

## Step 7: Examine Metadata

Inspect all metadata associated with the measurement.

In [None]:
import json

print("Metadata:")
print("=" * 60)

metadata = raw_measurement.metadata.meta

# Display metadata in a formatted way
for key, value in metadata.items():
    if isinstance(value, (dict, list)):
        print(f"\n{key}:")
        print(json.dumps(value, indent=2, default=str))
    else:
        print(f"{key}: {value}")

print("\n" + "=" * 60)

## Step 8: Data Statistics

Calculate basic statistics for key columns.

In [None]:
print("Data Statistics:")
print("=" * 60)

# Display statistics
display(df.describe())

print("\n" + "=" * 60)

## Step 9: Check for Missing Values

In [None]:
print("Missing Values Check:")
print("=" * 60)

missing = df.isnull().sum()
missing_pct = (missing / len(df)) * 100

missing_df = pd.DataFrame({
    'Missing Count': missing,
    'Percentage': missing_pct
})

# Only show columns with missing values
missing_df = missing_df[missing_df['Missing Count'] > 0]

if len(missing_df) > 0:
    display(missing_df)
else:
    print("✓ No missing values found")

print("\n" + "=" * 60)

## Step 10: Visualize Key Data

Create simple plots to visualize the electrochemical data.

In [None]:
import matplotlib.pyplot as plt

# Create figure with subplots
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
fig.suptitle('Biologic GPCL Data Visualization', fontsize=16, fontweight='bold')

# Find relevant columns (case-insensitive search)
cols_lower = {col.lower(): col for col in df.columns}

# Plot 1: Voltage vs Time
time_col = None
voltage_col = None
current_col = None
capacity_col = None

for key in cols_lower:
    if 'time' in key and 's' in key:
        time_col = cols_lower[key]
    if 'ewe' in key or ('voltage' in key and 'v' in key):
        voltage_col = cols_lower[key]
    if 'current' in key or '<i>' in key:
        current_col = cols_lower[key]
    if 'capacity' in key or 'q' in key:
        capacity_col = cols_lower[key]

# Plot 1: Voltage vs Time
if time_col and voltage_col:
    axes[0, 0].plot(df[time_col], df[voltage_col], linewidth=0.8)
    axes[0, 0].set_xlabel('Time (s)')
    axes[0, 0].set_ylabel('Voltage (V)')
    axes[0, 0].set_title('Voltage vs Time')
    axes[0, 0].grid(True, alpha=0.3)

# Plot 2: Current vs Time
if time_col and current_col:
    axes[0, 1].plot(df[time_col], df[current_col], linewidth=0.8, color='orange')
    axes[0, 1].set_xlabel('Time (s)')
    axes[0, 1].set_ylabel('Current (mA)')
    axes[0, 1].set_title('Current vs Time')
    axes[0, 1].grid(True, alpha=0.3)

# Plot 3: Voltage vs Capacity
if capacity_col and voltage_col:
    axes[1, 0].plot(df[capacity_col], df[voltage_col], linewidth=0.8, color='green')
    axes[1, 0].set_xlabel('Capacity (mAh)')
    axes[1, 0].set_ylabel('Voltage (V)')
    axes[1, 0].set_title('Voltage vs Capacity')
    axes[1, 0].grid(True, alpha=0.3)

# Plot 4: Current vs Voltage
if voltage_col and current_col:
    axes[1, 1].scatter(df[voltage_col], df[current_col], s=1, alpha=0.5, color='red')
    axes[1, 1].set_xlabel('Voltage (V)')
    axes[1, 1].set_ylabel('Current (mA)')
    axes[1, 1].set_title('Current vs Voltage')
    axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print("✓ Visualization complete")

## Step 11: Verify BiologicMPTReader Direct Access

Test direct access to the BiologicMPTReader class to see its internal methods.

In [None]:
from echemistpy.utils.external.echem.biologic_reader import BiologicMPTReader

# Create reader instance
reader = BiologicMPTReader()

print("BiologicMPTReader Methods:")
print("=" * 60)

# List public methods
methods = [method for method in dir(reader) if not method.startswith('_')]
for i, method in enumerate(methods, 1):
    print(f"{i:2d}. {method}")

print("\n" + "=" * 60)

## Step 12: Summary

Summarize the verification results.

In [None]:
print("\n" + "=" * 60)
print("VERIFICATION SUMMARY")
print("=" * 60)

print(f"✓ File loaded: {mpr_file.name}")
print(f"✓ Data shape: {len(df)} rows × {len(df.columns)} columns")
print(f"✓ Technique: {metadata.get('technique', 'Unknown')}")
print(f"✓ Source file: {metadata.get('source_file', 'Unknown')}")

print("\n" + "=" * 60)
print("CONCLUSION: Biologic reader is working correctly!")
print("=" * 60)