In [None]:
#ABRP trip data analysis
import pandas as pd
import matplotlib.pyplot as plt 

#read the Excel file into a DataFrame, skipping the first two rows (header at line 3)
filename = "2025-10-28_to_2025-11-16.xlsx"
df = pd.read_excel(filename, header=2)

# Keep only rows where Activity is 'Drive' or 'Charge' (case-insensitive, handles variants like '68 drives')
activity_mask = df['Activity'].astype(str).str.contains('Drive|Charge', na=False)
print('Rows before filtering:', len(df))
df = df[activity_mask].copy()
print('Rows after filtering (Drive or Charge only):', len(df))


In [None]:
# Calculate SoC delta per 100 km for trips > 5 km (Drive only), show all SoC values in % and avoid division by zero
import numpy as np
battery_capacity_kWh = 60.0  
df = df.copy()
df['Start SoC %'] = df['Start SoC'] * 100
df['End SoC %'] = df['End SoC'] * 100
df['soc_delta %'] = (df['Start SoC'] - df['End SoC']) * 100
# Only consider trips with Distance > 5 km and not null
trip_mask = df['Distance [km]'].notna() & (df['Distance [km]'] > 5)
drive_only_mask = df['Activity'].astype(str).str.startswith('Drive')
# Combined mask for meaningful drive trips
drive_trip_mask = trip_mask & drive_only_mask
df['soc_delta_per_100km %'] = np.nan  # initialize with NaN
df.loc[drive_trip_mask, 'soc_delta_per_100km %'] = df.loc[drive_trip_mask, 'soc_delta %'] / df.loc[drive_trip_mask, 'Distance [km]'] * 100
# Ensure the series is numeric and drop NaN for stats/plots
series_pct = pd.to_numeric(df.loc[drive_trip_mask, 'soc_delta_per_100km %'], errors='coerce').dropna().astype(float)
print(f'\nNumeric summary statistics for SoC delta per 100 km (%) (Drive trips > 5 km, n={len(series_pct)}):')
display(series_pct.describe())

series_kwh = series_pct / 100.0 * battery_capacity_kWh


fig, ax = plt.subplots(figsize=(6, 4))
ax.hist(series_pct, bins=60, color='C0', edgecolor='k')
ax.set_xlabel('SoC delta per 100 km (%)')
ax.set_ylabel('Frequency')
ax.set_title('Distribution of SoC needed per 100 km for Drive trips > 5 km (all SoC in %)')
ax.grid(True)

def pct_to_kwh(x):
    return x / 100.0 * battery_capacity_kWh

def kwh_to_pct(x):
    return x / battery_capacity_kWh * 100.0

secax = ax.secondary_xaxis('top', functions=(pct_to_kwh, kwh_to_pct))
secax.set_xlabel('Energy needed per 100 km (kWh)')
plt.show()

df.loc[drive_trip_mask, 'soc_delta_per_100km %'] = series_pct
df.loc[drive_trip_mask, 'soc_delta_per_100km_kWh'] = series_kwh
