In [33]:
import zlib
import struct
import pandas as pd
from pathlib import Path
from typing import Dict, Any, Tuple

def read_full_datz_contents(file_path: str, file_timestamp: float) -> Dict[str, Any]:
    """
    Reads a .datZ file and returns both the raw text header (metadata)
    and the parsed event data.
    
    Args:
        file_path: Path to the .datZ file
        file_timestamp: UTC start timestamp (required for calculating event times)
        
    Returns:
        Dictionary with:
        - 'header_text': The text portion of the file (metadata)
        - 'data': Pandas DataFrame of the events
    """
    path = Path(file_path)
    
    # 1. Read and Decompress
    try:
        raw_bytes = path.read_bytes()
        content = zlib.decompress(raw_bytes)  #
    except zlib.error as e:
        raise ValueError(f"Failed to decompress {path.name}: {e}")

    # 2. Find the split point between Text and Binary
    # The file uses "Phases in use:" as the delimiter
    marker = b"Phases in use:"
    marker_pos = content.find(marker)
    
    if marker_pos == -1:
        raise ValueError("Invalid format: Marker 'Phases in use:' not found.")

    # Find the newline after the marker to get the exact start of binary data
    newline_pos = content.find(b'\n', marker_pos)
    if newline_pos == -1:
        raise ValueError("Invalid format: No newline found after marker.")
    
    # 3. Split content
    # Everything up to the newline is text metadata
    header_bytes = content[:newline_pos + 1]
    # Everything after is binary event data
    binary_bytes = content[newline_pos + 1:]
    
    # 4. Decode Header (Metadata)
    try:
        header_text = header_bytes.decode('utf-8', errors='replace')
    except Exception:
        header_text = header_bytes.decode('ascii', errors='replace')

    # 5. Parse Binary Data
    # Logic derived from decoders.py _parse_binary_payload
    records = []
    row_size = 4
    num_rows = len(binary_bytes) // row_size
    
    for i in range(num_rows):
        chunk = binary_bytes[i * row_size : (i + 1) * row_size]
        
        # Unpack: >BBH (Big-Endian, EventCode, Parameter, TimeOffset)
        # Byte 0: Event Code
        # Byte 1: Parameter 
        # Bytes 2-3: Time offset in deciseconds
        event_code, parameter, offset_ds = struct.unpack('>BBH', chunk)
        
        # Calculate absolute timestamp
        event_ts = file_timestamp + (offset_ds / 10.0)
        
        records.append({
            'timestamp': event_ts,
            'event_code': event_code,
            'parameter': parameter
        })

    df = pd.DataFrame(records)
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s')

    return {
        'header_text': header_text,
        'data': df
    }

# --- Example Usage ---
if __name__ == "__main__":
    # You need the timestamp from the filename (as seen in ingestion.py)
    # Example: ECON_10.70.10.51_2025_01_15_1430.datZ
    # For this test, you can just use a dummy timestamp if you only care about content
    import time
    
    try:
        # Replace with your actual file path
        fpath = "C:\\Users\\rhansen\\Documents\\Python\\pyatspm\\intersections\\2068_US-95_and_SH-8\\raw_data\\ECON_10.37.2.68_2026_02_17_1230.datZ"
        
        # Note: You still need a base timestamp because the binary only contains offsets!
        # Use 0.0 or the current time if you just want to see the relative data structure.
        result = read_full_datz_contents(fpath, file_timestamp=pd.to_datetime(fpath[-20:-5], format='%Y_%m_%d_%H%M').timestamp())
        
        print("--- HEADER METADATA ---")
        print(result['header_text'])
        
        print("\n--- EVENT DATA ---")
        print(result['data'].head())
        
    except FileNotFoundError:
        print("File not found. Please update 'fpath' to a real .datZ file.")

--- HEADER METADATA ---
2-17-2026 12:30:00.1,Version #:,3
2-17-2026 12:30:00.1,ECON_10.37.2.68_2026_02_17_1230.datZ
2-17-2026 12:30:00.1,Intersection #:,10.37.2.68
2-17-2026 12:30:00.1,IP Address:,10.37.2.68
2-17-2026 12:30:00.1,MAC Address:,00:04:81:04:dd:b5
2-17-2026 12:30:00.1,Controller Data Log Beginning:,2/17/2026,12:30:00.1
2-17-2026 12:30:00.1,Phases in use:,4,6,8


--- EVENT DATA ---
                      timestamp  event_code  parameter
0 2026-02-17 12:30:00.000000000          82         58
1 2026-02-17 12:30:00.500000000          81         58
2 2026-02-17 12:30:00.700000048           3          8
3 2026-02-17 12:30:00.700000048          15          8
4 2026-02-17 12:30:01.000000000          82         57


In [34]:
result['data']

Unnamed: 0,timestamp,event_code,parameter
0,2026-02-17 12:30:00.000000000,82,58
1,2026-02-17 12:30:00.500000000,81,58
2,2026-02-17 12:30:00.700000048,3,8
3,2026-02-17 12:30:00.700000048,15,8
4,2026-02-17 12:30:01.000000000,82,57
...,...,...,...
2588,2026-02-17 12:44:58.900000095,82,50
2589,2026-02-17 12:44:59.200000048,81,50
2590,2026-02-17 12:44:59.599999905,43,4
2591,2026-02-17 12:44:59.599999905,82,36
