#### **Library Imports**

In [2]:
### Data Ingestion
import tempfile
from pathlib import Path
import py7zr

# Data Manipulation
import numpy as np
import pandas as pd

# Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
def read_parquet_gzip_7z(filepath: str | Path) -> pd.DataFrame:
    """Read a .parquet.gzip.7z file into a DataFrame.

    Extracts the 7z archive to a temp directory, then reads the inner
    parquet file with pandas. The '.gzip' in the filename refers to
    the parquet-internal compression codec, not external gzip wrapping.
    """
    with tempfile.TemporaryDirectory() as tmpdir:
        with py7zr.SevenZipFile(filepath, mode="r") as archive:
            archive.extractall(path=tmpdir)
        extracted = list(Path(tmpdir).iterdir())
        if not extracted:
            raise ValueError(f"No files found inside {filepath}")
        return pd.read_parquet(extracted[0])

In [4]:
path = ('../data/5FT0192.7z')
df = read_parquet_gzip_7z(path)

In [5]:
df.head()

Unnamed: 0,Timestamp,EEC1_Engine.ActualEngPercentTorque,EEC1_Engine.DriversDemandEngPercentTorque,EEC1_Engine.EngSpeed,EEC1_Engine.SrcAddrssOfCntrllngDvcForEngCtrl,LFE_Engine.EngFuelRate,EC1_Engine.EngReferenceTorque,EEC2_Engine.AccelPedalPos1,EEC2_Engine.ActlMaxAvailableEngPercentTorque,CCVS1_Engine.BrakeSwitch,...,TTC0016_FMI,TTC0016_FOC,TTC0017_FMI,TTC0017_FOC,NavSatNum,JammingSt,cwSuppress,HDOP,GpsReset,AutoMountState
0,4.192,0.0,0.0,0.0,0.0,0.0,2402.0,0.0,0.0,0.0,...,255.0,0.0,255.0,0.0,13.0,0.0,29.0,99.99,0.0,0.0
1,4.292,0.0,0.0,0.0,0.0,0.0,2402.0,0.0,0.0,0.0,...,255.0,0.0,255.0,0.0,13.0,0.0,29.0,99.99,0.0,0.0
2,4.392,0.0,0.0,0.0,0.0,0.0,2402.0,0.0,0.0,0.0,...,255.0,0.0,255.0,0.0,13.0,0.0,29.0,99.99,0.0,0.0
3,4.492,0.0,0.0,0.0,0.0,0.0,2402.0,0.0,0.0,0.0,...,255.0,0.0,255.0,0.0,13.0,0.0,29.0,99.99,0.0,0.0
4,4.592,0.0,0.0,0.0,0.0,0.0,2402.0,0.0,0.0,0.0,...,255.0,0.0,255.0,0.0,13.0,0.0,29.0,99.99,0.0,0.0


In [None]:
df.describe()

#### Override Events
1. Brake override
2. Throttle override
3. Cruise disengage override
4. Speed governer / iQC override
5. PCC system override

In [None]:
columns = ['override_id', 'override_timestamp', 'override_type', 'cruise_active_pre', 'vehicle_speed', 'driver_input_type', 'system_override_flag']
overrides = pd.DataFrame(columns=columns)