# HVAC Power Consumption Prediction in Electric Vehicles

## Main GOAL: To develop a highly accurate, real-time predictive model for the HVAC system's instantaneous power consumption in electric vehicles, utilizing a comprehensive,real-world dataset

### Notebook focusses on DATA PROCESSING

### Imports

In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import re
import warnings
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import mean_squared_error, make_scorer

# Optional: XGBoost
import xgboost as xgb

In [2]:
def clean_dataframe_columns(df: pd.DataFrame) -> pd.DataFrame:
    def clean_column(name: str) -> str:
        # 1. Remove units in brackets
        name = name.split("[")[0].strip()
        # 2. Replace special characters/spaces with underscores
        name = name.replace(".", "").replace(" ", "_").replace("-", "_").replace(")", "").replace("(", "")
        # 3. Lowercase for consistency
        name = name.lower()
        return name

    # Apply to all columns
    df = df.rename(columns={col: clean_column(col) for col in df.columns})
    return df

In [3]:
for cat in [FutureWarning, RuntimeWarning, UserWarning]:
    warnings.filterwarnings("ignore", category=cat)

### Data Loading

In [4]:
variables_list = []  # Only for CSVs

for dirname, _, files in os.walk('/kaggle/input'):
    for file in files:
        if file.endswith('.csv') or file.endswith('.xlsx'):
            name = os.path.splitext(file)[0]
            safe_name = name.replace(' ', '_').replace('-', '_')
            path = os.path.join(dirname, file)

            df = None  # Initialize to None

            # Try reading CSV with fallback encodings
            if file.endswith('.csv'):
                for enc in ['utf-8', 'ISO-8859-1', 'cp1252']:
                    try:
                        df = pd.read_csv(path, encoding=enc,sep=';')
                        print(f"Sucessfully read {file}")
                        break  # Success!
                    except UnicodeDecodeError:
                        continue  # Try next encoding
                    except Exception as e:
                        print(f"Unexpected error reading {file}: {e}")
                        break  # Stop trying on unexpected errors
                if df is None:
                    print(f"Failed to read {file} with all encodings.")
                    continue
                globals()[safe_name] = df
                variables_list.append(safe_name)

            elif file.endswith('.xlsx'):
                try:
                    df = pd.read_excel(path)
                    globals()[safe_name] = df
                except Exception as e:
                    print(f"Failed to read Excel {file}: {e}")

Sucessfully read TripB31.csv
Sucessfully read TripB22.csv
Sucessfully read TripA07.csv
Sucessfully read TripB18.csv
Sucessfully read TripB35.csv
Sucessfully read TripA30.csv
Sucessfully read TripB34.csv
Sucessfully read TripB24.csv
Sucessfully read TripA15.csv
Sucessfully read TripA18.csv
Sucessfully read TripB12.csv
Sucessfully read TripA27.csv
Sucessfully read TripB36.csv
Sucessfully read TripB29.csv
Sucessfully read TripA28.csv
Sucessfully read TripB15.csv
Sucessfully read TripB13.csv
Sucessfully read TripA06.csv
Sucessfully read TripA32.csv
Sucessfully read TripB33.csv
Sucessfully read TripA11.csv
Sucessfully read TripB17.csv
Sucessfully read TripB02.csv
Sucessfully read TripA14.csv
Sucessfully read TripB32.csv
Sucessfully read TripB38.csv
Sucessfully read TripA21.csv
Sucessfully read TripA08.csv
Sucessfully read TripB30.csv
Sucessfully read TripA23.csv
Sucessfully read TripA05.csv
Sucessfully read TripA09.csv
Sucessfully read TripB20.csv
Sucessfully read TripA22.csv
Sucessfully re

### Data Combining

In [5]:
trip_a_dfs = []
trip_b_dfs = []

for var_name in variables_list:
    match = re.search(r'(Trip)?([AB])(\d{2})', var_name, re.IGNORECASE)
    if match:
        trip_letter = match.group(2).upper()  # 'A' or 'B'
        trip_number = match.group(3)          # '01', '18', etc.
        trip_id = f"{trip_letter}{trip_number}"
        
        df = globals()[var_name].copy()
        df['Trip'] = trip_id
        df['season'] = 'summer' if trip_letter == 'A' else 'winter'

        if trip_letter == 'A':
            trip_a_dfs.append(df)
        else:
            trip_b_dfs.append(df)
    else:
        print(f"Skipped unknown trip format: {var_name}")

# Combine all trips into separate DataFrames
TripA = pd.concat(trip_a_dfs, ignore_index=True)
TripB = pd.concat(trip_b_dfs, ignore_index=True)

### Data Preprocessing

### *Overview.xlsx*

In [6]:
Overview.head()

Unnamed: 0,Trip,Date,Route/Area,Weather,Battery Temperature (Start) [°C],Battery Temperature (End),Battery State of Charge (Start),Battery State of Charge (End),Unnamed: 8,Ambient Temperature (Start) [°C],Target Cabin Temperature,Distance [km],Duration [min],Unnamed: 13,Fan,Note
0,TripA01,2019-06-25_13-21-14,Munich East,sunny,21.0,22.0,0.863,0.803,0.06,25.5,23.0,7.42769,16.82,,"Automatic, Level 1",
1,TripA02,2019-06-25_14-05-31,Munich East,sunny,23.0,26.0,0.803,0.673,0.13,32.0,23.0,23.509709,23.55,,"Automatic, Level 1",Target Cabin Temperature changed
2,TripA03,2019-06-28_10-02-15,Munich East,sunny,24.0,25.0,0.835,0.751,0.084,21.5,27.0,12.820846,11.18,,"Automatic, Level 1",Target Cabin Temperature changed
3,TripA04,2019-06-28_10-13-30,Munich East,sunny,25.0,27.0,0.751,0.667,0.084,24.0,22.0,10.727491,6.87,,"Automatic, Level 1",
4,TripA05,2019-06-28_10-20-26,Munich East,sunny,27.0,27.0,0.667,0.602,0.065,24.5,24.0,12.393223,22.776667,,"Automatic, Level 1",


In [7]:
Overview.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72 entries, 0 to 71
Data columns (total 16 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Trip                              70 non-null     object 
 1   Date                              70 non-null     object 
 2   Route/Area                        70 non-null     object 
 3   Weather                           70 non-null     object 
 4   Battery Temperature (Start) [°C]  70 non-null     float64
 5   Battery Temperature (End)         70 non-null     float64
 6   Battery State of Charge (Start)   70 non-null     float64
 7   Battery State of Charge (End)     70 non-null     float64
 8   Unnamed: 8                        70 non-null     float64
 9   Ambient Temperature (Start) [°C]  70 non-null     float64
 10  Target Cabin Temperature          70 non-null     float64
 11  Distance [km]                     70 non-null     float64
 12  Duration [

In [8]:
Overview = Overview.drop(columns=['Unnamed: 13'])

In [9]:
Overview.isnull().sum()

Trip                                 2
Date                                 2
Route/Area                           2
Weather                              2
Battery Temperature (Start) [°C]     2
Battery Temperature (End)            2
Battery State of Charge (Start)      2
Battery State of Charge (End)        2
Unnamed: 8                           2
Ambient Temperature (Start) [°C]     2
Target Cabin Temperature             2
Distance [km]                        2
Duration [min]                       2
Fan                                  2
Note                                46
dtype: int64

In [10]:
Overview[Overview['Trip'].isnull()]

Unnamed: 0,Trip,Date,Route/Area,Weather,Battery Temperature (Start) [°C],Battery Temperature (End),Battery State of Charge (Start),Battery State of Charge (End),Unnamed: 8,Ambient Temperature (Start) [°C],Target Cabin Temperature,Distance [km],Duration [min],Fan,Note
32,,,,,,,,,,,,,,,
33,,,,,,,,,,,,,,,


In [11]:
Overview.dropna(subset=['Trip'], inplace=True)

In [12]:
Overview.isnull().sum()

Trip                                 0
Date                                 0
Route/Area                           0
Weather                              0
Battery Temperature (Start) [°C]     0
Battery Temperature (End)            0
Battery State of Charge (Start)      0
Battery State of Charge (End)        0
Unnamed: 8                           0
Ambient Temperature (Start) [°C]     0
Target Cabin Temperature             0
Distance [km]                        0
Duration [min]                       0
Fan                                  0
Note                                44
dtype: int64

In [13]:
Overview['Weather'].value_counts()

Weather
sunny                 26
slightly cloudy       20
cloudy                13
dark                   4
sunrise                3
rainy                  2
dark, little rainy     1
sunset                 1
Name: count, dtype: int64

#### Feature Extraction

In [14]:
Overview['Note'].value_counts()

Note
Target Cabin Temperature changed                            4
Sunday -> little traffic                                    4
Rush Hour                                                   4
+60 kg                                                      2
+ 160 kg                                                    2
Fast Charging, '+70 kg                                      1
+70 kg                                                      1
+70kg                                                       1
FTMRoute                                                    1
FTMRoute (directly after previous trip)                     1
FTM Route (directly after previous trip)                    1
+80 kg                                                      1
+70 kg                                                      1
Sunday -> little traffic, (directly after previous trip)    1
Open door at 37:10 and 42:00 for 30s                        1
Name: count, dtype: int64

In [15]:
# Fill NaN with empty string for easier processing
Overview["Note"] = Overview["Note"].fillna("")

In [16]:
# Extra weight (numeric)
Overview["extra_weight"] = Overview["Note"].str.extract(r"\+(\d+)\s*kg", expand=False)
Overview["extra_weight"] = Overview["extra_weight"].astype(float).fillna(0)

# Binary flags
Overview["is_sunday"] = Overview["Note"].str.contains("Sunday", case=False).astype(int)
Overview["rush_hour"] = Overview["Note"].str.contains("Rush Hour", case=False).astype(int)
Overview["fast_charging"] = Overview["Note"].str.contains("Fast Charging", case=False).astype(int)
Overview["after_previous_trip"] = Overview["Note"].str.contains("after previous trip", case=False).astype(int)
Overview["open_door_event"] = Overview["Note"].str.contains("Open door", case=False).astype(int)
Overview["target_temp_changed"] = Overview["Note"].str.contains("Target Cabin Temperature changed", case=False).astype(int)

# Drop the original Note column
Overview = Overview.drop(columns=["Note"])

In [17]:
Overview.nunique()

Trip                                70
Date                                69
Route/Area                           9
Weather                              8
Battery Temperature (Start) [°C]    30
Battery Temperature (End)           24
Battery State of Charge (Start)     61
Battery State of Charge (End)       66
Unnamed: 8                          64
Ambient Temperature (Start) [°C]    40
Target Cabin Temperature            12
Distance [km]                       70
Duration [min]                      70
Fan                                  2
extra_weight                         4
is_sunday                            2
rush_hour                            2
fast_charging                        2
after_previous_trip                  2
open_door_event                      2
target_temp_changed                  2
dtype: int64

### *TripA*

In [18]:
TripA.head()

Unnamed: 0,Time [s],Velocity [km/h],Elevation [m],Throttle [%],Motor Torque [Nm],Longitudinal Acceleration [m/s^2],Regenerative Braking Signal,Battery Voltage [V],Battery Current [A],Battery Temperature [°C],...,Heat Exchanger Temperature [°C],Cabin Temperature Sensor [°C],Trip,season,Heating Power LIN [W],Heater Voltage [V],Heater Current [A],Coolant Temperature Heatercore [°C],Coolant Temperature Inlet [°C],Unnamed: 23
0,0.0,2.22,487.0,25.18,25.0,0.54,0.0,379.9,-12.9,31.0,...,14.5,39.84,A07,summer,,,,,,
1,0.1,2.32,487.0,25.49,35.53,0.68,0.0,379.89,-13.0,31.0,...,14.5,39.84,A07,summer,,,,,,
2,0.2,2.53,487.0,25.03,48.03,0.92,0.0,379.79,-14.1,31.0,...,14.5,39.84,A07,summer,,,,,,
3,0.3,2.81,487.0,24.89,55.97,1.06,0.0,379.7,-15.14,31.0,...,14.5,39.84,A07,summer,,,,,,
4,0.4,3.12,487.0,24.08,56.09,1.1,0.0,379.7,-15.59,31.0,...,14.5,39.84,A07,summer,,,,,,


In [19]:
TripA.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 467701 entries, 0 to 467700
Data columns (total 31 columns):
 #   Column                               Non-Null Count   Dtype  
---  ------                               --------------   -----  
 0   Time [s]                             467701 non-null  float64
 1   Velocity [km/h]                      467701 non-null  float64
 2   Elevation [m]                        467701 non-null  float64
 3   Throttle [%]                         467701 non-null  float64
 4   Motor Torque [Nm]                    467701 non-null  float64
 5   Longitudinal Acceleration [m/s^2]    467701 non-null  float64
 6   Regenerative Braking Signal          467701 non-null  float64
 7   Battery Voltage [V]                  467701 non-null  float64
 8   Battery Current [A]                  467701 non-null  float64
 9   Battery Temperature [°C]             467701 non-null  float64
 10  max. Battery Temperature [°C]        467701 non-null  float64
 11  SoC [%]      

In [20]:
TripA = TripA.drop(columns=['Unnamed: 23'])

In [21]:
TripA.isnull().sum()

Time [s]                                    0
Velocity [km/h]                             0
Elevation [m]                               0
Throttle [%]                                0
Motor Torque [Nm]                           0
Longitudinal Acceleration [m/s^2]           0
Regenerative Braking Signal                 0
Battery Voltage [V]                         0
Battery Current [A]                         0
Battery Temperature [°C]                    0
max. Battery Temperature [°C]               0
SoC [%]                                     0
displayed SoC [%]                           0
min. SoC [%]                                0
max. SoC [%)                                0
Heating Power CAN [kW]                      0
Requested Heating Power [W]                 0
AirCon Power [kW]                           0
Heater Signal                               0
Ambient Temperature [°C]                    0
Requested Coolant Temperature [°C]      19829
Heat Exchanger Temperature [°C]   

#### Extract Data From Overview

In [22]:
# Merge data
TripA = TripA.merge(
    Overview, 
    left_on='Trip', 
    right_on='Trip', 
    how='left'
)
TripA.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 467701 entries, 0 to 467700
Data columns (total 50 columns):
 #   Column                               Non-Null Count   Dtype  
---  ------                               --------------   -----  
 0   Time [s]                             467701 non-null  float64
 1   Velocity [km/h]                      467701 non-null  float64
 2   Elevation [m]                        467701 non-null  float64
 3   Throttle [%]                         467701 non-null  float64
 4   Motor Torque [Nm]                    467701 non-null  float64
 5   Longitudinal Acceleration [m/s^2]    467701 non-null  float64
 6   Regenerative Braking Signal          467701 non-null  float64
 7   Battery Voltage [V]                  467701 non-null  float64
 8   Battery Current [A]                  467701 non-null  float64
 9   Battery Temperature [°C]             467701 non-null  float64
 10  max. Battery Temperature [°C]        467701 non-null  float64
 11  SoC [%]      

### *TripB*

In [23]:
TripB.head()

Unnamed: 0,Time [s],Velocity [km/h],Elevation [m],Throttle [%],Motor Torque [Nm],Longitudinal Acceleration [m/s^2],Regenerative Braking Signal,Battery Voltage [V],Battery Current [A],Battery Temperature [°C],...,Temperature Feetvent Driver [°C],Temperature Head Co-Driver [°C],Temperature Head Driver [°C],Temperature Vent right [°C],Temperature Vent central right [°C],Temperature Vent central left [°C],Temperature Vent right [°C].1,Trip,season,Velocity [km/h]]]
0,0.0,0.0,511.0,0.0,0.0,-0.29,0.0,386.2,-2.1,6.0,...,7.82,9.48,9.13,6.68,7.03,6.86,6.16,B31,winter,
1,0.1,0.0,511.0,0.0,0.0,-0.31,0.0,386.15,-2.63,6.0,...,7.82,9.48,9.13,6.68,7.03,6.86,6.16,B31,winter,
2,0.2,0.0,511.0,0.0,0.0,-0.31,0.0,386.1,-3.18,6.0,...,7.82,9.48,9.13,6.68,7.03,6.86,6.16,B31,winter,
3,0.3,0.0,511.0,0.0,0.0,-0.33,0.0,386.05,-3.53,6.0,...,7.79,9.44,9.11,6.68,7.03,6.86,6.15,B31,winter,
4,0.4,0.0,511.0,0.0,0.0,-0.31,0.0,386.0,-3.88,6.0,...,7.7,9.31,9.06,6.66,7.01,6.83,6.12,B31,winter,


In [24]:
TripB.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 627092 entries, 0 to 627091
Data columns (total 51 columns):
 #   Column                                  Non-Null Count   Dtype  
---  ------                                  --------------   -----  
 0   Time [s]                                627092 non-null  float64
 1   Velocity [km/h]                         610663 non-null  float64
 2   Elevation [m]                           627092 non-null  float64
 3   Throttle [%]                            627092 non-null  float64
 4   Motor Torque [Nm]                       627092 non-null  float64
 5   Longitudinal Acceleration [m/s^2]       627092 non-null  float64
 6   Regenerative Braking Signal             627092 non-null  float64
 7   Battery Voltage [V]                     627092 non-null  float64
 8   Battery Current [A]                     627092 non-null  float64
 9   Battery Temperature [°C]                627092 non-null  float64
 10  max. Battery Temperature [°C]           6270

In [25]:
# Check where one is null and the other is not
mask1 = TripB['Velocity [km/h]'].isna() & TripB['Velocity [km/h]]]'].notna()
mask2 = TripB['Velocity [km/h]]]'].isna() & TripB['Velocity [km/h]'].notna()

# Subsets of rows where only one column has the value
only_in_second = TripB[mask1]
only_in_first = TripB[mask2]

# Both missing
both_missing = TripB['Velocity [km/h]'].isna() & TripB['Velocity [km/h]]]'].isna()

# Both filled
both_filled = TripB['Velocity [km/h]'].notna() & TripB['Velocity [km/h]]]'].notna()

# Display counts
print("Missing in Velocity [km/h] but present in Velocity [km/h]]]:", only_in_second.shape[0])
print("Missing in Velocity [km/h]]] but present in Velocity [km/h]:", only_in_first.shape[0])
print("Rows where BOTH are missing:", both_missing.sum())
print("Rows where BOTH are filled:", both_filled.sum())

Missing in Velocity [km/h] but present in Velocity [km/h]]]: 16429
Missing in Velocity [km/h]]] but present in Velocity [km/h]: 610663
Rows where BOTH are missing: 0
Rows where BOTH are filled: 0


In [26]:
TripB['Velocity [km/h]'] = TripB['Velocity [km/h]'].fillna(TripB['Velocity [km/h]]]'])
TripB = TripB.drop(columns=['Velocity [km/h]]]'])

In [27]:
TripB.isnull().sum()

Time [s]                                      0
Velocity [km/h]                               0
Elevation [m]                                 0
Throttle [%]                                  0
Motor Torque [Nm]                             0
Longitudinal Acceleration [m/s^2]             0
Regenerative Braking Signal                   0
Battery Voltage [V]                           0
Battery Current [A]                           0
Battery Temperature [°C]                      0
max. Battery Temperature [°C]                 0
SoC [%]                                   30793
displayed SoC [%]                         30794
min. SoC [%]                              30793
max. SoC [%)                              30794
Heating Power CAN [kW]                        0
Heating Power LIN [W]                         0
Requested Heating Power [W]                   0
AirCon Power [kW]                             0
Heater Signal                                 0
Heater Voltage [V]                      

In [28]:
# Get column names
col47 = TripB.columns[47]
col44 = TripB.columns[44]

print("Column 47 name:", repr(col47))
print("Column 44 name:", repr(col44))

# Check if names are the same
if col47 == col44:
    print("Column names are exactly the same.")
else:
    print("Column names differ.")

    # Optional: show differences character by character
    import difflib
    diff = difflib.ndiff([col47], [col44])
    print("Difference:")
    print("\n".join(diff))

# Compare values row by row
matches = (TripB.iloc[:, 47] == TripB.iloc[:, 47])

print("Number of matches:", matches.sum())
print("Number of mismatches:", (~matches).sum())

Column 47 name: 'Temperature Vent right [°C]'
Column 44 name: 'Temperature Vent right [°C] '
Column names differ.
Difference:
- Temperature Vent right [°C]
+ Temperature Vent right [°C] 
?                            +

Number of matches: 622453
Number of mismatches: 4639


In [29]:
# Create a single reconciled column using the average
TripB['Temperature Vent right [°C]'] = TripB[[col44, col47]].mean(axis=1, skipna=True)

# Drop the duplicate (with trailing space)
TripB = TripB.drop(columns=[col44])

In [30]:
TripB.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 627092 entries, 0 to 627091
Data columns (total 49 columns):
 #   Column                                  Non-Null Count   Dtype  
---  ------                                  --------------   -----  
 0   Time [s]                                627092 non-null  float64
 1   Velocity [km/h]                         627092 non-null  float64
 2   Elevation [m]                           627092 non-null  float64
 3   Throttle [%]                            627092 non-null  float64
 4   Motor Torque [Nm]                       627092 non-null  float64
 5   Longitudinal Acceleration [m/s^2]       627092 non-null  float64
 6   Regenerative Braking Signal             627092 non-null  float64
 7   Battery Voltage [V]                     627092 non-null  float64
 8   Battery Current [A]                     627092 non-null  float64
 9   Battery Temperature [°C]                627092 non-null  float64
 10  max. Battery Temperature [°C]           6270

In [31]:
# Merge data
TripB = TripB.merge(
    Overview, 
    left_on='Trip', 
    right_on='Trip', 
    how='left'
)
TripB.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 627092 entries, 0 to 627091
Data columns (total 69 columns):
 #   Column                                  Non-Null Count   Dtype  
---  ------                                  --------------   -----  
 0   Time [s]                                627092 non-null  float64
 1   Velocity [km/h]                         627092 non-null  float64
 2   Elevation [m]                           627092 non-null  float64
 3   Throttle [%]                            627092 non-null  float64
 4   Motor Torque [Nm]                       627092 non-null  float64
 5   Longitudinal Acceleration [m/s^2]       627092 non-null  float64
 6   Regenerative Braking Signal             627092 non-null  float64
 7   Battery Voltage [V]                     627092 non-null  float64
 8   Battery Current [A]                     627092 non-null  float64
 9   Battery Temperature [°C]                627092 non-null  float64
 10  max. Battery Temperature [°C]           6270

### Drop & Imputation based on column knowledge


In [32]:
# TripA
tripA_keep_cols = [
    # Core driving and battery signals
    'time', 'velocity', 'elevation', 'throttle', 'motor_torque',
    'longitudinal_acceleration', 'regenerative_braking_signal',
    'battery_voltage', 'battery_current', 'battery_temperature',
    'max_battery_temperature', 'soc', 'displayed_soc',
    'min_soc', 'max_soc',
    # HVAC signals
    'heating_power_can', 'requested_heating_power', 'aircon_power', 'heater_signal',
    # Temperatures
    'ambient_temperature', 'requested_coolant_temperature', 'heat_exchanger_temperature',
    'cabin_temperature_sensor',
    # Partial HVAC data
    'heating_power_lin', 'heater_voltage', 'heater_current', 'coolant_temperature_heatercore', 'coolant_temperature_inlet'
]

# TripB
tripB_keep_cols = [
    # Core driving and battery signals
    'time', 'velocity', 'elevation', 'throttle', 'motor_torque',
    'longitudinal_acceleration', 'regenerative_braking_signal',
    'battery_voltage', 'battery_current', 'battery_temperature',
    'max_battery_temperature', 'soc', 'displayed_soc',
    'min_soc', 'max_soc',
    # HVAC signals
    'heating_power_can', 'heating_power_lin', 'requested_heating_power', 'aircon_power', 'heater_signal', 'heater_voltage', 'heater_current',
    # Temperatures
    'ambient_temperature', 'ambient_temperature_sensor', 'requested_coolant_temperature', 'coolant_temperature_heatercore', 'coolant_temperature_inlet',
    'heat_exchanger_temperature', 'cabin_temperature_sensor',
    # Additional HVAC sensors
    'temperature_coolant_heater_inlet', 'temperature_coolant_heater_outlet',
    'temperature_heat_exchanger_outlet',
    'temperature_defrost_lateral_left', 'temperature_defrost_lateral_right',
    'temperature_defrost_central', 'temperature_defrost_central_left', 'temperature_defrost_central_right',
    'temperature_footweel_driver', 'temperature_footweel_co_driver',
    'temperature_feetvent_co_driver', 'temperature_feetvent_driver',
    'temperature_head_co_driver', 'temperature_head_driver',
    'temperature_vent_central_right', 'temperature_vent_central_left', 'temperature_vent_right'
]

def preprocess_trip(df, trip='A'):
    """
    Preprocess TripA or TripB using known columns
    """
    df = clean_dataframe_columns(df)

    # Select relevant columns based on dataset
    if trip == 'A':
        df = df[[c for c in tripA_keep_cols if c in df.columns]]
        season = 'summer'
        df['hvac_target_power'] = df.get('aircon_power', np.nan)
    elif trip == 'B':
        df = df[[c for c in tripB_keep_cols if c in df.columns]]
        season = 'winter'
        df['hvac_target_power'] = df.get('heating_power_can', np.nan)

    # Impute missing values (time-series friendly)
    df = df.interpolate(method='linear', limit_direction='both')

    # Feature engineering
    if 'battery_voltage' in df.columns and 'battery_current' in df.columns:
        df['battery_power'] = df['battery_voltage'] * df['battery_current']

    if 'cabin_temperature_sensor' in df.columns and 'ambient_temperature' in df.columns:
        df['cabin_ambient_delta'] = df['cabin_temperature_sensor'] - df['ambient_temperature']

    # Previous timestep HVAC signals
    hvac_cols = ['requested_heating_power', 'aircon_power', 'heating_power_can']
    for col in hvac_cols:
        if col in df.columns:
            df[f'{col}_prev'] = df[col].shift(1).fillna(method='bfill')

    return df


TripA_clean = preprocess_trip(TripA, trip='A')
TripB_clean = preprocess_trip(TripB, trip='B')

In [33]:
TripA_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 467701 entries, 0 to 467700
Data columns (total 34 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   time                            467701 non-null  float64
 1   velocity                        467701 non-null  float64
 2   elevation                       467701 non-null  float64
 3   throttle                        467701 non-null  float64
 4   motor_torque                    467701 non-null  float64
 5   longitudinal_acceleration       467701 non-null  float64
 6   regenerative_braking_signal     467701 non-null  float64
 7   battery_voltage                 467701 non-null  float64
 8   battery_current                 467701 non-null  float64
 9   battery_temperature             467701 non-null  float64
 10  max_battery_temperature         467701 non-null  float64
 11  soc                             467701 non-null  float64
 12  displayed_soc   

In [34]:
TripB_clean.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 627092 entries, 0 to 627091
Data columns (total 52 columns):
 #   Column                             Non-Null Count   Dtype  
---  ------                             --------------   -----  
 0   time                               627092 non-null  float64
 1   velocity                           627092 non-null  float64
 2   elevation                          627092 non-null  float64
 3   throttle                           627092 non-null  float64
 4   motor_torque                       627092 non-null  float64
 5   longitudinal_acceleration          627092 non-null  float64
 6   regenerative_braking_signal        627092 non-null  float64
 7   battery_voltage                    627092 non-null  float64
 8   battery_current                    627092 non-null  float64
 9   battery_temperature                627092 non-null  float64
 10  max_battery_temperature            627092 non-null  float64
 11  soc                                6270

### Save the cleaned files

In [35]:
TripA_clean.to_csv("TripA_clean.csv")
TripB_clean.to_csv("TripB_clean.csv")