In [1]:
import pandas as pd
import struct
import numpy as np

In [2]:
# ---------------------------------------------------------
# CONFIGURATION
# ---------------------------------------------------------
raw_file = 'c2c_can_20Nov_2310_5.csv'      # Your RAW CSV
parsed_file = 'cpo_can_20Nov_2310_10m.csv' # Your PARSED CSV

# How close must the timestamps be to consider them a match? (in milliseconds)
TIME_TOLERANCE_MS = 20 

# ---------------------------------------------------------
# 1. LOAD AND PREPARE DATA
# ---------------------------------------------------------
print("Loading files... (this may take a moment)")
df_raw = pd.read_csv(raw_file)
df_parsed = pd.read_csv(parsed_file)

# Ensure timestamps are sorted for merging
df_raw = df_raw.sort_values('timestamp')
df_parsed = df_parsed.sort_values('timestamp')

# Fix the mixed-up byte columns in raw file
byte_cols = ['byte1', 'byte2', 'byte3', 'byte4', 'byte5', 'byte6', 'byte7', 'byte8']
# Create a list of correctly ordered columns if they exist in the dataframe
available_byte_cols = [col for col in byte_cols if col in df_raw.columns]

# ---------------------------------------------------------
# 2. IDENTIFY TARGET COLUMNS
# ---------------------------------------------------------
# Find all columns starting with "Pack_cellVoltage_"
cell_columns = [c for c in df_parsed.columns if c.startswith('Pack_cellVoltage_')]
# Sort them so we process 1, 2, 3... numerically
cell_columns.sort(key=lambda x: int(x.split('_')[-1]) if x.split('_')[-1].isdigit() else 9999)

print(f"Found {len(cell_columns)} cell voltage columns to map.")

# ---------------------------------------------------------
# 3. MAPPING ENGINE
# ---------------------------------------------------------
# We will store results here: CellName -> {ID, ByteOffset, Endianness}
cell_map = {}

print("-" * 80)
print(f"{'Cell Name':<25} | {'Source CAN ID':<15} | {'Bytes'} | {'Endian'} | {'Mux ID (Byte 0)'}")
print("-" * 80)

# We don't need to check every single row. We just need ONE good match to establish the pattern.
# We pick a sample of valid rows from the parsed file.
sample_indices = df_parsed.dropna(subset=cell_columns).index[::50] # Check every 50th row to speed up

for cell_name in cell_columns:
    # If we already mapped this cell, skip (optional, but good for verification to keep running)
    if cell_name in cell_map: continue

    match_found = False
    
    # Iterate through a few sample points to find a match
    for idx in sample_indices[:20]: # Try max 20 different timestamps
        if idx not in df_parsed.index: continue
        
        # Get the target voltage (e.g., 3.31)
        val_float = df_parsed.loc[idx, cell_name]
        if pd.isna(val_float) or val_float == 0: continue
        
        # Convert to raw mV (e.g., 3310)
        val_mv = int(round(val_float * 1000))
        
        # Create search patterns
        # Little Endian (Standard Intel)
        le_bytes = list(struct.pack('<H', val_mv)) 
        # Big Endian (Motorola)
        be_bytes = list(struct.pack('>H', val_mv))
        
        # Find the timestamp
        ts_target = df_parsed.loc[idx, 'timestamp']
        
        # Get raw rows within tolerance window
        # We filter the raw dataframe for a tiny window around the timestamp
        window = df_raw[
            (df_raw['timestamp'] >= ts_target - TIME_TOLERANCE_MS) & 
            (df_raw['timestamp'] <= ts_target + TIME_TOLERANCE_MS)
        ]
        
        if window.empty: continue

        # Scan the raw frames in this window
        for _, raw_row in window.iterrows():
            # Reconstruct byte array
            data = [raw_row[col] for col in available_byte_cols]
            
            # Search for the voltage bytes
            for i in range(len(data) - 1):
                # Check Little Endian
                if data[i] == le_bytes[0] and data[i+1] == le_bytes[1]:
                    can_id = raw_row['can_id']
                    mux = data[0] # Grab first byte in case it's a Multiplex Index
                    
                    print(f"{cell_name:<25} | {can_id:<15} | {i}-{i+1}   | LE       | {mux}")
                    cell_map[cell_name] = {'id': can_id, 'bytes': (i, i+1), 'endian': 'LE'}
                    match_found = True
                    break
                
                # Check Big Endian
                if data[i] == be_bytes[0] and data[i+1] == be_bytes[1]:
                    can_id = raw_row['can_id']
                    mux = data[0]
                    
                    print(f"{cell_name:<25} | {can_id:<15} | {i}-{i+1}   | BE       | {mux}")
                    cell_map[cell_name] = {'id': can_id, 'bytes': (i, i+1), 'endian': 'BE'}
                    match_found = True
                    break
            
            if match_found: break
        if match_found: break

print("-" * 80)
print(f"Mapping complete. Identified {len(cell_map)}/{len(cell_columns)} cells.")

Loading files... (this may take a moment)
Found 576 cell voltage columns to map.
--------------------------------------------------------------------------------
Cell Name                 | Source CAN ID   | Bytes | Endian | Mux ID (Byte 0)
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Mapping complete. Identified 0/576 cells.
