In [5]:
import os
import numpy as np
import pandas as pd
import random

In [6]:
# ---------- CONSTANTS known from Simulink ------------
SOC_VEC = np.array([0.0, 0.1, 0.25, 0.5, 0.75, 0.9, 1.0])
T_VEC   = np.array([278.0, 293.0, 313.0])                 # Kelvin

# Open‑circuit voltage table 7×3 (rows = SOC, cols = T)
V0_MAT  = np.array([
    [3.4966, 3.5057, 3.5148],
    [3.5519, 3.5660, 3.5653],
    [3.6183, 3.6337, 3.6402],
    [3.7066, 3.7127, 3.7213],
    [3.9131, 3.9259, 3.9376],
    [4.0748, 4.0777, 4.0821],
    [4.1923, 4.1928, 4.1930]
])

# Terminal‑resistance table 7×3
R0_MAT  = np.array([
    [0.0117, 0.0085, 0.0090],
    [0.0110, 0.0085, 0.0090],
    [0.0114, 0.0087, 0.0092],
    [0.0107, 0.0082, 0.0088],
    [0.0107, 0.0083, 0.0091],
    [0.0113, 0.0085, 0.0089],
    [0.0116, 0.0085, 0.0089]
])

NS    = 110       # series cells
Q_NOM = 57.0      # Ah

# Heuristic for hot‑cell proxy
K_HOT = 1.5       # K per (kW·K/W)  (tune)
R_TH  = 0.2       # K/W             (tune)


def bilinear(tab, x_vec, y_vec, x, y):
    """fast vectorised bilinear interpolation for 2‑D table"""
    # clip
    x = np.clip(x, x_vec[0], x_vec[-1])
    y = np.clip(y, y_vec[0], y_vec[-1])

    # indices
    xi = np.searchsorted(x_vec, x, side='right') - 1
    yi = np.searchsorted(y_vec, y, side='right') - 1
    xi = np.clip(xi, 0, len(x_vec)-2)
    yi = np.clip(yi, 0, len(y_vec)-2)

    x1, x2 = x_vec[xi], x_vec[xi+1]
    y1, y2 = y_vec[yi], y_vec[yi+1]

    f11 = tab[xi,   yi  ]
    f21 = tab[xi+1, yi  ]
    f12 = tab[xi,   yi+1]
    f22 = tab[xi+1, yi+1]

    # weights
    wx = (x - x1) / (x2 - x1 + 1e-12)
    wy = (y - y1) / (y2 - y1 + 1e-12)

    return (f11 * (1-wx)*(1-wy) +
            f21 * wx*(1-wy)     +
            f12 * (1-wx)*wy     +
            f22 * wx*wy)

def process_csv(path, out_dir):
    df = pd.read_csv(path)

    dt      = df['sim_dt_physics'].values
    I       = df['sim_battery_curr'].values          # A
    soc     = df['sim_battery_soc'].values           # 0-1
    T       = df['sim_battery_temp'].values + 273.15 # K
    speed   = df['sim_speed'].values                 # km/h

    # --- OCV & R0 ---------------------------------------------------------
    ocv = bilinear(V0_MAT, SOC_VEC, T_VEC, soc, T)
    r0  = bilinear(R0_MAT, SOC_VEC, T_VEC, soc, T)

    V_pack = ocv * NS
    P_pack = I * V_pack                       # W  (-ve = regen)
    df['P_pack_W'] = P_pack

    # --- simple derived channels -----------------------------------------
    df['C_rate']   = np.abs(I) / Q_NOM
    df['P_loss_W'] = I**2 * r0

    # --- cumulative energy & distance ------------------------------------
    df['E_kWh_cum']      = np.cumsum(P_pack * dt) / 3.6e6
    distance_m           = np.cumsum(speed * (1000/3600) * dt)
    df['distance_km_cum'] = distance_m / 1000.0

    # --- instantaneous Wh / km -------------------------------------------
    with np.errstate(divide='ignore', invalid='ignore'):
        wh_inst = np.where(speed > 1e-2,
                           P_pack/1000.0 / speed * 100.0,   # → Wh/100 km
                           np.nan)

    df['Wh_per_km_mask'] = (~np.isnan(wh_inst)).astype(np.float32)   # 🆕 mask
    df['Wh_per_km_inst'] = np.nan_to_num(wh_inst, nan=0.0)           # 🆕 zero-fill

    # --- rolling 1-km Wh / km --------------------------------------------
    wh_1km = np.full_like(P_pack, np.nan, dtype=float)
    j0 = 0
    for j in range(len(df)):
        while distance_m[j] - distance_m[j0] > 1000.0:
            j0 += 1
        dE = (df['E_kWh_cum'].iloc[j] - df['E_kWh_cum'].iloc[j0]) * 1000.0  # Wh
        wh_1km[j] = dE                                # Wh per 1 km window

    df['Wh_per_km_rolling_mask'] = (~np.isnan(wh_1km)).astype(np.float32)    # 🆕
    df['Wh_per_km_rolling']      = np.nan_to_num(wh_1km, nan=0.0)            # 🆕

    # --- hot-cell proxy ---------------------------------------------------
    df['T_hot_proxy'] = (df['sim_battery_temp'] +
                         K_HOT * np.abs(P_pack)/1000.0 * R_TH)

    # --- save -------------------------------------------------------------
    os.makedirs(out_dir, exist_ok=True)
    outfile = os.path.join(out_dir,
                           os.path.basename(path).replace('.csv', '_aug.csv'))
    df.to_csv(outfile, index=False)
    print(f'✓ {outfile} written')

In [8]:
# ----------- Configuration -----------
logs_dir = r"F:\Onedrive\Uni\MSc_uddannelse\4_semester\KandidatThesis\Thesis_Implementation\Scripts\SimulinkCarlaCoSimAPI"               # Input folder with raw CSV files
output_dir = r"F:\Onedrive\Uni\MSc_uddannelse\4_semester\KandidatThesis\Thesis_Implementation\Scripts\RL_network\logs_augmented"   # Output folder for processed data
test_ratio = 0.1                # 10% test data
random_seed = 42                # For reproducibility
# --------------------------------------

# Create output directories
os.makedirs(output_dir, exist_ok=True)
train_dir = os.path.join(output_dir, "train")
test_dir = os.path.join(output_dir, "test")
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# Get all CSV files in logs folder
all_files = [os.path.join(logs_dir, f) for f in os.listdir(logs_dir) if f.endswith(".csv")]
random.Random(random_seed).shuffle(all_files)  # Shuffle deterministically

# Split into train/test
split_idx = int(len(all_files) * (1 - test_ratio))
train_files = all_files[:split_idx]
test_files = all_files[split_idx:]

# Process files
print(f"Processing {len(train_files)} training files and {len(test_files)} test files...")
for file in train_files:
    process_csv(file, train_dir)
for file in test_files:
    process_csv(file, test_dir)

print("Done! Processed files saved to:", output_dir)

Processing 14 training files and 2 test files...
✓ F:\Onedrive\Uni\MSc_uddannelse\4_semester\KandidatThesis\Thesis_Implementation\Scripts\RL_network\logs_augmented\train\sim_B5_mapTown04_soc1.0_temp30C_20250513_012403_aug.csv written
✓ F:\Onedrive\Uni\MSc_uddannelse\4_semester\KandidatThesis\Thesis_Implementation\Scripts\RL_network\logs_augmented\train\sim_B5_mapTown05_soc1.0_temp30C_20250513_115325_aug.csv written
✓ F:\Onedrive\Uni\MSc_uddannelse\4_semester\KandidatThesis\Thesis_Implementation\Scripts\RL_network\logs_augmented\train\sim_A5_mapTown05_soc1.0_temp40C_20250512_210557_aug.csv written
✓ F:\Onedrive\Uni\MSc_uddannelse\4_semester\KandidatThesis\Thesis_Implementation\Scripts\RL_network\logs_augmented\train\sim_B1_mapTown04_soc1.0_temp30C_20250512_235255_aug.csv written
✓ F:\Onedrive\Uni\MSc_uddannelse\4_semester\KandidatThesis\Thesis_Implementation\Scripts\RL_network\logs_augmented\train\sim_D1_mapTown05_soc1.0_temp-5C_20250514_232549_aug.csv written
✓ F:\Onedrive\Uni\MSc_udda

KeyError: 'sim_dt_physics'