In [1]:
import pandas as pd
import numpy as np


In [2]:

# Load the data
file_path = '1_amurrio_durango_0.2_0_0_0_output.csv'
data = pd.read_csv(file_path, delimiter=';')

# Display the first few rows of the dataset
print(data.head())

# Check for missing values
print(data.isnull().sum())

# Convert columns to appropriate data types
data['step'] = data['step'].astype(int)
data['acceleration(m/s²)'] = data['acceleration(m/s²)'].astype(float)
data['actualBatteryCapacity(Wh)'] = data['actualBatteryCapacity(Wh)'].astype(float)
data['SoC(%)'] = data['SoC(%)'].astype(float)
data['speed(m/s)'] = data['speed(m/s)'].astype(float)
data['speedFactor'] = data['speedFactor'].astype(float)
data['totalEnergyConsumed(Wh)'] = data['totalEnergyConsumed(Wh)'].astype(float)
data['totalEnergyRegenerated(Wh)'] = data['totalEnergyRegenerated(Wh)'].astype(float)
data['lon'] = data['lon'].astype(float)
data['lat'] = data['lat'].astype(float)
data['alt'] = data['alt'].astype(float)
data['slope(º)'] = data['slope(º)'].astype(float)
data['completedDistance(km)'] = data['completedDistance(km)'].astype(float)
data['mWh'] = data['mWh'].astype(float)
data['remainingRange(km)'] = data['remainingRange(km)'].astype(float)

# Normalize numerical features (optional)
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
numerical_features = ['acceleration(m/s²)', 'actualBatteryCapacity(Wh)', 'SoC(%)', 'speed(m/s)', 
                      'speedFactor', 'totalEnergyConsumed(Wh)', 'totalEnergyRegenerated(Wh)', 
                      'lon', 'lat', 'alt', 'slope(º)', 'completedDistance(km)', 'mWh', 'remainingRange(km)']

data[numerical_features] = scaler.fit_transform(data[numerical_features])

# Extract useful information (e.g., time-based features)
# Assuming 'step' represents time steps, we can create a time column
data['time'] = data['step'] * 1  # Assuming each step is 1 second

# Create a new feature: energy consumption rate (Wh/km)
data['energyConsumptionRate(Wh/km)'] = data['totalEnergyConsumed(Wh)'] / data['completedDistance(km)']

# Handle infinite values in the new feature
data['energyConsumptionRate(Wh/km)'] = data['energyConsumptionRate(Wh/km)'].replace([np.inf, -np.inf], np.nan)
data['energyConsumptionRate(Wh/km)'] = data['energyConsumptionRate(Wh/km)'].fillna(0)

# Drop unnecessary columns (if any)
# data = data.drop(columns=['unnecessary_column'])

# Save the preprocessed data to a new CSV file
output_file_path = 'preprocessed_electric_vehicle_trip_data.csv'
data.to_csv(output_file_path, index=False)

print("Preprocessing completed and data saved to", output_file_path)

  vehID  step  acceleration(m/s²)  actualBatteryCapacity(Wh)  SoC(%)  \
0   EV0   101                 1.5                   38998.85  100.00   
1   EV0   102                 1.5                   38996.11   99.99   
2   EV0   103                 1.5                   38991.79   99.98   
3   EV0   104                 1.5                   38985.88   99.96   
4   EV1   104                 2.3                   38997.85   99.99   

   speed(m/s)  speedFactor  totalEnergyConsumed(Wh)  \
0         1.5     0.916619                     1.15   
1         3.0     0.916619                     3.89   
2         4.5     0.916619                     8.21   
3         6.0     0.916619                    14.12   
4         2.3     0.956203                     2.15   

   totalEnergyRegenerated(Wh)       lon        lat         alt  slope(º)  \
0                         0.0 -3.001003  43.053164  218.346642  3.549004   
1                         0.0 -3.001024  43.053186  218.532343  3.549004   
2       