In [1]:
#!/usr/bin/env python
# coding: utf-8
import sys
import platform
import logging

sys.path.append('./prod/nelson/')   #Comment this section when running on airflow
# sys.path.append('..') #comment this when running locally
from common import db_operations

import pandas as pd
import numpy as np
from datetime import datetime, date, timedelta
from common.db_operations import connect_to_trino, fetch_data_for_day, write_df_to_iceberg,execute_query

In [2]:
# Configure basic logging for the business logic file
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S')

# Print the Python version being used
print(f"Using Python version: {platform.python_version()}")

Using Python version: 3.11.13


In [3]:
# ---- report configuration ----
TABLE_NAME = "energy_mileage_report"
SOURCE_TABLE = "can_parsed_output_100"
COLUMNS_TO_FETCH = [
    '"id"','"timestamp"',
    'at_timezone("timestamp", \'Asia/Kolkata\') AS IST',
    '"BAT_SOC"','"Bat_Voltage"','"Total_Battery_Current"','"GUN_Connection_Status"',
    '"OdoMeterReading"','"Gear_Position"','"Vehiclereadycondition"','"BrakePedalPos"',
    '"Vehicle_speed_VCU"','"Chargingcontactor1positive"','"Chargingcontactor1negative"',
    '"Chargingcontactor2positive"','"Chargingcontactor2negative"']

In [4]:
conn = connect_to_trino()
query = f"""
SELECT 
    id,timestamp,
    CAST(timestamp AT TIME ZONE 'Asia/Kolkata' AS DATE) AS dateval,
    BAT_SOC,
    Bat_Voltage,
    BrakePedalPos, 
    Vehicle_speed_VCU,
    Total_Battery_Current,
    GUN_Connection_Status,
    OdoMeterReading,
    Gear_Position,
    Vehiclereadycondition,
    Chargingcontactor1positive,
    Chargingcontactor1negative,
    Chargingcontactor2positive,
    Chargingcontactor2negative
from 
  can_parsed_output_100
where 
  id in ('3')
  and timestamp >= CAST('2025-09-09' AS TIMESTAMP) AT TIME ZONE 'Asia/Kolkata' - INTERVAL '5' HOUR - INTERVAL '30' MINUTE
  and timestamp < CAST('2025-09-10' AS TIMESTAMP) AT TIME ZONE 'Asia/Kolkata' - INTERVAL '5' HOUR - INTERVAL '30' MINUTE
"""
df = execute_query(conn, query)
conn.close()
df.head()

2025-09-23 16:57:21 - INFO - üîå STEP 1: Connecting to Trino...
2025-09-23 16:57:21 - INFO - ‚úÖ STEP 1: Connected to Trino
2025-09-23 16:57:21 - INFO - ‚öôÔ∏è Executing query...
2025-09-23 16:57:37 - INFO - ‚úÖ Query executed successfully!


Unnamed: 0,id,timestamp,dateval,BAT_SOC,Bat_Voltage,BrakePedalPos,Vehicle_speed_VCU,Total_Battery_Current,GUN_Connection_Status,OdoMeterReading,Gear_Position,Vehiclereadycondition,Chargingcontactor1positive,Chargingcontactor1negative,Chargingcontactor2positive,Chargingcontactor2negative
0,3,2025-09-09 18:24:38.446,2025-09-09,,,0.0,7.960938,,0.0,,2.0,1.0,,,,
1,3,2025-09-09 18:24:43.486,2025-09-09,84.4,637.6,0.0,8.285156,20.4,0.0,14098.125,2.0,1.0,0.0,0.0,0.0,0.0
2,3,2025-09-09 18:24:50.786,2025-09-09,84.4,637.5,0.0,8.859375,29.2,0.0,14098.125,2.0,1.0,0.0,0.0,0.0,0.0
3,3,2025-09-09 18:25:09.386,2025-09-09,84.4,638.0,0.0,6.886719,14.6,0.0,14098.25,2.0,1.0,0.0,0.0,0.0,0.0
4,3,2025-09-09 18:25:26.266,2025-09-09,84.4,637.2,0.0,12.347656,46.1,0.0,14098.25,2.0,1.0,0.0,0.0,0.0,0.0


In [5]:
wait here

SyntaxError: invalid syntax (3991150741.py, line 1)

In [None]:
def process_data(conn,df:pd.DataFrame):
    # Process data related energy-mileage relation
    df_res = df
    if not df_res.empty:
        # write_df_to_iceberg(conn, df_res, "energy_mileage_report" )
        logging.info("‚úÖ Processing and write for specific IDs complete.")
    else:
        logging.info("Processed DataFrame is empty. No data to write.")
    return df_res

In [None]:
conn = connect_to_trino()
start_date_str = '2025-09-01'
end_date_str = '2025-09-14'
start_date = date.fromisoformat(start_date_str)
end_date = date.fromisoformat(end_date_str)
date_range = [start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1)]
print(start_date.isoformat())
vehicle_ids_for_report = ['3', '16']#, '18', '19']

# List to accumulate daily DataFrames
all_days_data = []

for single_date in date_range:
    date_str = single_date.isoformat()
    logging.info(f"‚ñ∂Ô∏è Starting daily report job for {date_str}")
    df_day = fetch_data_for_day(conn, date_str, COLUMNS_TO_FETCH, SOURCE_TABLE, vehicle_ids_for_report)
    if not df_day.empty:
        all_days_data.append(df_day)

# Concatenate all daily DataFrames into one
df_raw = pd.concat(all_days_data, ignore_index=True) if all_days_data else pd.DataFrame()

2025-09-23 11:43:43 - INFO - üîå STEP 1: Connecting to Trino...
2025-09-23 11:43:43 - INFO - ‚úÖ STEP 1: Connected to Trino
2025-09-23 11:43:43 - INFO - ‚ñ∂Ô∏è Starting daily report job for 2025-09-01
2025-09-23 11:43:43 - INFO - üì• STEP 2a: Validating and fetching data for 2025-09-01...
2025-09-23 11:43:43 - INFO - ‚öôÔ∏è Executing query...


2025-09-01


2025-09-23 11:44:02 - INFO - ‚úÖ Query executed successfully!
2025-09-23 11:44:02 - INFO - ‚úÖ STEP 2d: Data fetching for 2025-09-01 completed, Rows fetched: 139620
2025-09-23 11:44:02 - INFO - ‚ñ∂Ô∏è Starting daily report job for 2025-09-02
2025-09-23 11:44:02 - INFO - üì• STEP 2a: Validating and fetching data for 2025-09-02...
2025-09-23 11:44:02 - INFO - ‚öôÔ∏è Executing query...
2025-09-23 11:44:20 - INFO - ‚úÖ Query executed successfully!
2025-09-23 11:44:20 - INFO - ‚úÖ STEP 2d: Data fetching for 2025-09-02 completed, Rows fetched: 70283
2025-09-23 11:44:20 - INFO - ‚ñ∂Ô∏è Starting daily report job for 2025-09-03
2025-09-23 11:44:20 - INFO - üì• STEP 2a: Validating and fetching data for 2025-09-03...
2025-09-23 11:44:20 - INFO - ‚öôÔ∏è Executing query...
2025-09-23 11:44:43 - INFO - ‚úÖ Query executed successfully!
2025-09-23 11:44:43 - INFO - ‚úÖ STEP 2d: Data fetching for 2025-09-03 completed, Rows fetched: 98415
2025-09-23 11:44:43 - INFO - ‚ñ∂Ô∏è Starting daily report job f

In [None]:
df_raw.id.unique()

array(['16', '3'], dtype=object)

In [None]:
df_raw.head()

Unnamed: 0,id,timestamp,IST,BAT_SOC,Bat_Voltage,Total_Battery_Current,GUN_Connection_Status,OdoMeterReading,Gear_Position,Vehiclereadycondition,BrakePedalPos,Vehicle_speed_VCU,Chargingcontactor1positive,Chargingcontactor1negative,Chargingcontactor2positive,Chargingcontactor2negative
0,16,2025-09-01 18:23:38.895,2025-09-01 23:53:38.895000+05:30,78.0,632.7,132.7,0.0,,2.0,1.0,0.0,75.77734,0.0,0.0,0.0,0.0
1,16,2025-09-01 18:23:40.032,2025-09-01 23:53:40.032000+05:30,78.0,632.4,172.1,0.0,9127.875,2.0,1.0,0.0,76.34766,0.0,0.0,0.0,0.0
2,16,2025-09-01 18:23:49.373,2025-09-01 23:53:49.373000+05:30,78.0,631.5,137.3,0.0,,2.0,1.0,0.0,80.09375,0.0,0.0,0.0,0.0
3,16,2025-09-01 18:23:51.510,2025-09-01 23:53:51.510000+05:30,78.0,631.6,119.3,0.0,9128.125,2.0,1.0,0.0,80.32422,0.0,0.0,0.0,0.0
4,16,2025-09-01 18:24:26.892,2025-09-01 23:54:26.892000+05:30,77.6,632.0,115.7,0.0,,2.0,1.0,0.0,80.421875,0.0,0.0,0.0,0.0


In [None]:
# Find the index of the 'IST' column
ist_index = df_raw.columns.get_loc('IST')

# Insert 'hour' column right after 'IST'
df_raw.insert(ist_index + 1, 'hour', df_raw['IST'].dt.hour)
df_raw['hour'] = df_raw['IST'].dt.hour

In [None]:
# ...after loading df_raw...

cols = ['Chargingcontactor1positive', 'Chargingcontactor1negative', 'Chargingcontactor2positive', 'Chargingcontactor2negative']
df_raw['any_charging_contactor_active'] = ((df_raw[cols].fillna(0).astype(float).any(axis=1) & (df_raw['GUN_Connection_Status'] == 1))).astype(int)
# Now df_raw['any_charging_contactor_active'] will be 1 (active) or 0 (inactive).
df_raw.head()

Unnamed: 0,id,timestamp,IST,hour,BAT_SOC,Bat_Voltage,Total_Battery_Current,GUN_Connection_Status,OdoMeterReading,Gear_Position,Vehiclereadycondition,BrakePedalPos,Vehicle_speed_VCU,Chargingcontactor1positive,Chargingcontactor1negative,Chargingcontactor2positive,Chargingcontactor2negative,any_charging_contactor_active
0,16,2025-09-01 18:23:38.895,2025-09-01 23:53:38.895000+05:30,23,78.0,632.7,132.7,0.0,,2.0,1.0,0.0,75.77734,0.0,0.0,0.0,0.0,0
1,16,2025-09-01 18:23:40.032,2025-09-01 23:53:40.032000+05:30,23,78.0,632.4,172.1,0.0,9127.875,2.0,1.0,0.0,76.34766,0.0,0.0,0.0,0.0,0
2,16,2025-09-01 18:23:49.373,2025-09-01 23:53:49.373000+05:30,23,78.0,631.5,137.3,0.0,,2.0,1.0,0.0,80.09375,0.0,0.0,0.0,0.0,0
3,16,2025-09-01 18:23:51.510,2025-09-01 23:53:51.510000+05:30,23,78.0,631.6,119.3,0.0,9128.125,2.0,1.0,0.0,80.32422,0.0,0.0,0.0,0.0,0
4,16,2025-09-01 18:24:26.892,2025-09-01 23:54:26.892000+05:30,23,77.6,632.0,115.7,0.0,,2.0,1.0,0.0,80.421875,0.0,0.0,0.0,0.0,0


In [None]:
# Impute missing values for each vehicle, sorted by IST
def impute_group(group):
    group = group.sort_values('IST')
    # Columns to impute
    columns_to_impute = ['BAT_SOC','Gear_Position','Vehiclereadycondition','any_charging_contactor_active','OdoMeterReading']


    for col in columns_to_impute:
        mask = group[col].isnull()
        # Find previous and next valid IST for each missing value
        prev_time = group['IST'].where(~mask).ffill()
        next_time = group['IST'].where(~mask).bfill()
        # Calculate time gap between previous and next valid points
        time_gap = ((next_time - prev_time).dt.total_seconds()).abs()
        # Interpolate only where time_gap <= 60s
        eligible = mask & (time_gap <= 60)
        group.loc[eligible, col] = group[col].interpolate(method='linear', limit_direction='both')[eligible]
    return group

# # Apply imputation to the entire df_raw, grouped by 'id'
df_raw_imputed = df_raw.groupby('id', group_keys=False).apply(impute_group)
df_raw_imputed.head()

  df_raw_imputed = df_raw.groupby('id', group_keys=False).apply(impute_group)


Unnamed: 0,id,timestamp,IST,hour,BAT_SOC,Bat_Voltage,Total_Battery_Current,GUN_Connection_Status,OdoMeterReading,Gear_Position,Vehiclereadycondition,BrakePedalPos,Vehicle_speed_VCU,Chargingcontactor1positive,Chargingcontactor1negative,Chargingcontactor2positive,Chargingcontactor2negative,any_charging_contactor_active
137038,16,2025-08-31 18:30:00.706,2025-09-01 00:00:00.706000+05:30,0,,,,,8477.75,,,,,,,,,0
136586,16,2025-08-31 18:30:01.806,2025-09-01 00:00:01.806000+05:30,0,,,,,8477.75,,,,,,,,,0
136789,16,2025-08-31 18:30:02.826,2025-09-01 00:00:02.826000+05:30,0,,,,,8477.75,,,,,,,,,0
137165,16,2025-08-31 18:30:04.008,2025-09-01 00:00:04.008000+05:30,0,,,,,8477.75,,,,,,,,,0
136471,16,2025-08-31 18:30:05.046,2025-09-01 00:00:05.046000+05:30,0,,,,,8477.75,,,,,,,,,0


In [None]:
df = df_raw_imputed.copy()
df['date'] = df['IST'].dt.date
# df = df_raw.loc[(df_raw.BAT_SOC.notna())&(df_raw.Total_Battery_Current.abs()<3000)]

vehicle_id = '16'  # id should be a string
date_val = pd.to_datetime('2025-09-03').date()
hour_val = 8

subset = df[
    (df['id'] == vehicle_id) &
    (df['IST'].dt.date == date_val) &
    (df['hour'].astype(int) == hour_val)
]

subset.describe()
print(len(subset))

3279


In [None]:
len(subset)

3279

In [None]:
subset[subset.timestamp == subset.timestamp.min()]

Unnamed: 0,id,timestamp,IST,hour,BAT_SOC,Bat_Voltage,Total_Battery_Current,GUN_Connection_Status,OdoMeterReading,Gear_Position,Vehiclereadycondition,BrakePedalPos,Vehicle_speed_VCU,Chargingcontactor1positive,Chargingcontactor1negative,Chargingcontactor2positive,Chargingcontactor2negative,any_charging_contactor_active,date
278468,16,2025-09-03 02:30:00.520,2025-09-03 08:00:00.520000+05:30,8,,,,,9355.0,,,,,,,,,0,2025-09-03


In [None]:
subset[subset.timestamp == subset.timestamp.max()]

Unnamed: 0,id,timestamp,IST,hour,BAT_SOC,Bat_Voltage,Total_Battery_Current,GUN_Connection_Status,OdoMeterReading,Gear_Position,Vehiclereadycondition,BrakePedalPos,Vehicle_speed_VCU,Chargingcontactor1positive,Chargingcontactor1negative,Chargingcontactor2positive,Chargingcontactor2negative,any_charging_contactor_active,date
272247,16,2025-09-03 03:29:59.560,2025-09-03 08:59:59.560000+05:30,8,,,,,9357.125,,,,,,,,,0,2025-09-03


In [None]:
subset[(subset.Total_Battery_Current>=50)].describe()

Unnamed: 0,timestamp,hour,BAT_SOC,Bat_Voltage,Total_Battery_Current,GUN_Connection_Status,OdoMeterReading,Gear_Position,Vehiclereadycondition,BrakePedalPos,Vehicle_speed_VCU,Chargingcontactor1positive,Chargingcontactor1negative,Chargingcontactor2positive,Chargingcontactor2negative,any_charging_contactor_active
count,129,129.0,129.0,129.0,129.0,129.0,129.0,129.0,129.0,129.0,129.0,129.0,129.0,129.0,129.0,129.0
mean,2025-09-03 02:38:12.917092864,8.0,98.04031,636.394574,78.237984,0.0,9355.957364,2.0,1.0,0.16124,19.135144,0.0,0.0,0.0,0.0,0.0
min,2025-09-03 02:34:43.760000,8.0,97.6,634.1,50.0,0.0,9355.0,2.0,1.0,0.0,5.671875,0.0,0.0,0.0,0.0,0.0
25%,2025-09-03 02:35:59.240000,8.0,98.0,636.0,61.7,0.0,9355.5,2.0,1.0,0.0,15.160156,0.0,0.0,0.0,0.0,0.0
50%,2025-09-03 02:37:15.780000,8.0,98.0,636.4,71.8,0.0,9355.875,2.0,1.0,0.0,19.59375,0.0,0.0,0.0,0.0,0.0
75%,2025-09-03 02:40:36.580000,8.0,98.4,636.8,87.0,0.0,9356.5,2.0,1.0,0.0,23.699219,0.0,0.0,0.0,0.0,0.0
max,2025-09-03 02:43:25.800000,8.0,98.4,638.0,148.6,0.0,9357.0,2.0,1.0,20.8,29.261719,0.0,0.0,0.0,0.0,0.0
std,,0.0,0.299353,0.837156,22.859938,0.0,0.612178,0.0,0.0,1.831338,5.850083,0.0,0.0,0.0,0.0,0.0


In [None]:
subset[subset.BAT_SOC.notna()]

Unnamed: 0,id,timestamp,IST,hour,BAT_SOC,Bat_Voltage,Total_Battery_Current,GUN_Connection_Status,OdoMeterReading,Gear_Position,Vehiclereadycondition,BrakePedalPos,Vehicle_speed_VCU,Chargingcontactor1positive,Chargingcontactor1negative,Chargingcontactor2positive,Chargingcontactor2negative,any_charging_contactor_active,date
284879,16,2025-09-03 02:30:35.100,2025-09-03 08:00:35.100000+05:30,8,98.4,0.0,0.0,0.0,9355.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,2025-09-03
285194,16,2025-09-03 02:30:36.040,2025-09-03 08:00:36.040000+05:30,8,98.4,0.0,0.0,0.0,9355.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,2025-09-03
285367,16,2025-09-03 02:30:37.200,2025-09-03 08:00:37.200000+05:30,8,98.4,640.2,0.0,0.0,9355.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,2025-09-03
284188,16,2025-09-03 02:30:38.340,2025-09-03 08:00:38.340000+05:30,8,98.4,640.3,0.0,0.0,9355.000,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0,2025-09-03
284880,16,2025-09-03 02:30:39.460,2025-09-03 08:00:39.460000+05:30,8,98.4,640.2,0.0,0.0,9355.000,0.0,0.0,7.2,0.0,0.0,0.0,0.0,0.0,0,2025-09-03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
275997,16,2025-09-03 02:51:39.460,2025-09-03 08:21:39.460000+05:30,8,97.6,638.4,0.0,0.0,9357.125,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0,2025-09-03
277135,16,2025-09-03 02:51:40.620,2025-09-03 08:21:40.620000+05:30,8,97.6,638.4,0.0,0.0,9357.125,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0,2025-09-03
276635,16,2025-09-03 02:51:41.640,2025-09-03 08:21:41.640000+05:30,8,97.6,638.4,0.0,0.0,9357.125,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0,2025-09-03
276157,16,2025-09-03 02:51:42.640,2025-09-03 08:21:42.640000+05:30,8,97.6,638.4,0.0,0.0,9357.125,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0,2025-09-03


In [None]:
# Apply imputation to the entire df_raw, grouped by 'id'
# subset_imputed = subset.groupby('id', group_keys=False).apply(impute_group)
# subset_imputed.head()
# subset_imputed.describe()

In [None]:
# subset_imputed = subset_imputed.dropna(subset=['Total_Battery_Current', 'Bat_Voltage', 'IST', 'hour'])
# subset_imputed.isnull().sum(),subset_imputed.describe()
# subset_imputed['date'] = subset_imputed['IST'].dt.date``
# subset_imputed.sort_values(by=['id', 'IST'], inplace=True)
# subset_imputed.head()

In [None]:
wa

NameError: name 'wa' is not defined

In [None]:
wait here

In [None]:
def energy_mileage_stats_hourly(df: pd.DataFrame):
    try:
        logging.info("‚öôÔ∏è STEP 3a: Starting hourly data processing with imputation...")
        df = df.copy()
        df['date'] = df['IST'].dt.date

        columns_to_impute = ['OdoMeterReading', 'Gear_Position', 'Vehiclereadycondition','Total_Battery_Current', 'Bat_Voltage']
        critical_columns = columns_to_impute

        # Initial stats
        total_rows = len(df)
        non_null_before = df[critical_columns].notnull().all(axis=1).sum()

        def impute_group(group):
            group = group.sort_values('IST')
            for col in columns_to_impute:
                mask = group[col].isnull()
                prev_time = group['IST'].where(~mask).ffill()
                next_time = group['IST'].where(~mask).bfill()
                time_gap = ((next_time - prev_time).dt.total_seconds()).abs()
                eligible = mask & (time_gap <= 300)
                group.loc[eligible, col] = group[col].interpolate(method='linear', limit_direction='both')[eligible]
            return group

        # Calculate total raw rows per id, date, hour
        raw_counts = (
            df.groupby(['id', df['IST'].dt.date, df['hour']])
            .size()
            .reset_index(name='raw_datapoints')
            .rename(columns={'IST': 'date'})
        )

        df = df.groupby('id', group_keys=False).apply(impute_group)
        # imputed_rows = df[critical_columns].notnull().all(axis=1).sum()
        df = df.dropna(subset=critical_columns)
        # final_rows = len(df)
        df = df[df['Total_Battery_Current'].abs() <= 3000]
        df.sort_values(by=['id', 'IST'], inplace=True)

        all_hourly_stats = []
        for (vehicle_id, date_val, hour_val), group in df.groupby(['id', 'date', 'hour']):
            # Per-hour stats
            hour_total_rows = len(group)
            # hour_non_null_before = group[critical_columns].notnull().all(axis=1).sum()
            # hour_imputed_rows = hour_non_null_before
            # hour_final_rows = len(group.dropna(subset=critical_columns))

            raw_datapoints = raw_counts[
                (raw_counts['id'] == vehicle_id) &
                (raw_counts['date'] == date_val) &
                (raw_counts['hour'] == hour_val)
            ]['raw_datapoints'].values
            raw_datapoints = raw_datapoints[0] if len(raw_datapoints) > 0 else hour_total_rows
            
            # Calculate how many were imputed
            hour_non_null_before_impute = raw_datapoints - group[critical_columns].notnull().all(axis=1).sum()
            hour_non_null_after_impute = group[critical_columns].notnull().all(axis=1).sum()
            hour_imputed_count = hour_non_null_after_impute - (raw_datapoints - hour_non_null_before_impute)

            vehicle_df = group.copy()
            vehicle_df['time_diff_seconds'] = vehicle_df['IST'].diff().dt.total_seconds().fillna(0)
            vehicle_df = vehicle_df[vehicle_df['time_diff_seconds'] > 0]
            vehicle_df['power_kW'] = (vehicle_df['Bat_Voltage'] * vehicle_df['Total_Battery_Current']) / 1000

            driving_df = vehicle_df[(vehicle_df['Gear_Position'] == 2.0) | (vehicle_df['Vehiclereadycondition'] == 1.0)].copy()
            driving_df['energy_consumption_kwh'] = driving_df.apply(
                lambda row: row['power_kW'] * (row['time_diff_seconds'] / 3600) if row['power_kW'] > 0 else 0, axis=1)
            driving_df['regen_energy_kwh'] = driving_df.apply(
                lambda row: -row['power_kW'] * (row['time_diff_seconds'] / 3600) if row['power_kW'] < 0 else 0, axis=1)
            driving_df['distance_increment'] = driving_df['OdoMeterReading'].diff().fillna(0)
            driving_df = driving_df[driving_df['distance_increment'] <= 10]

            if not driving_df.empty:
                hourly_driving_mileage = driving_df['distance_increment'].sum()
                hourly_driving_energy = driving_df['energy_consumption_kwh'].sum()
                hourly_regen_energy = driving_df['regen_energy_kwh'].sum()
                non_null_datapoints = len(driving_df)
            else:
                hourly_driving_mileage = 0
                hourly_driving_energy = 0
                hourly_regen_energy = 0
                non_null_datapoints = 0

            stationary_df = vehicle_df[(vehicle_df['Gear_Position'] == 0.0)].copy()
            if not stationary_df.empty:
                stationary_df['energy_kwh'] = stationary_df.apply(
                    lambda row: row['power_kW'] * (row['time_diff_seconds'] / 3600) if row['power_kW'] > 0 else 0, axis=1)
                hourly_idling_energy = stationary_df['energy_kwh'].sum()
            else:
                hourly_idling_energy = 0

            net_energy_kwh = hourly_driving_energy - hourly_regen_energy
            threshold = 0.1  # km
            if hourly_driving_mileage < threshold:
                mileage_kwh_per_km = np.nan
            else:
                mileage_kwh_per_km = round(net_energy_kwh / hourly_driving_mileage, 2)
            
            # Calculate percentages
            raw_data_pcnt = round((raw_datapoints / 3600.0) * 100, 2) if raw_datapoints > 0 else 0
            non_null_data_pcnt = round((non_null_datapoints / raw_datapoints) * 100, 2) if raw_datapoints > 0 else 0

            hourly_stats_df = pd.DataFrame([{
                'id': vehicle_id,
                'date': date_val,
                'hour': hour_val,
                'dist_travelled_km': round(hourly_driving_mileage, 2),
                'energy_consumed_kwh': round(hourly_driving_energy, 2),
                'regen_energy_kwh': round(hourly_regen_energy, 2),
                'idling_energy_kwh': round(hourly_idling_energy, 2),
                'net_energy_kwh': round(net_energy_kwh, 2),
                'mileage_kwh_per_km': mileage_kwh_per_km,
                'raw_datapoints': raw_datapoints,
                'non_null_datapoints': non_null_datapoints,
                'raw_data_pcnt': raw_data_pcnt,
                'non_null_data_pcnt': non_null_data_pcnt
            }])
            all_hourly_stats.append(hourly_stats_df)

        if all_hourly_stats:
            final_df = pd.concat(all_hourly_stats, ignore_index=True)
            logging.info("‚úÖ STEP 3c: All vehicle hourly data processed.")
            return final_df
        else:
            logging.info("No data found for driving or idling periods.")
            return pd.DataFrame()

    except Exception as e:
        logging.warning(f"An error occurred: {e}")
        return pd.DataFrame()

In [None]:
df_out = energy_mileage_stats_hourly(df_raw)
df_out.head()

2025-09-23 10:48:39 - INFO - ‚öôÔ∏è STEP 3a: Starting hourly data processing with imputation...
  df = df.groupby('id', group_keys=False).apply(impute_group)
2025-09-23 10:48:59 - INFO - ‚úÖ STEP 3c: All vehicle hourly data processed.


Unnamed: 0,id,date,hour,dist_travelled_km,energy_consumed_kwh,regen_energy_kwh,idling_energy_kwh,net_energy_kwh,mileage_kwh_per_km,raw_datapoints,non_null_datapoints,raw_data_pcnt,non_null_data_pcnt
0,16,2025-09-01,0,16.88,18.27,0.55,1.54,17.72,1.05,3185,1213,88.47,38.08
1,16,2025-09-01,1,56.25,51.12,3.25,0.41,47.87,0.85,3192,3190,88.67,99.94
2,16,2025-09-01,2,73.5,63.8,4.84,0.0,58.96,0.8,3268,3265,90.78,99.91
3,16,2025-09-01,3,47.38,66.5,21.81,0.0,44.68,0.94,3330,3325,92.5,99.85
4,16,2025-09-01,4,7.12,11.08,4.24,0.0,6.84,0.96,3306,1112,91.83,33.64


In [None]:
df_out[(df_out.hour == 19) & (df_out.id =='16')]

Unnamed: 0,id,date,hour,dist_travelled_km,energy_consumed_kwh,regen_energy_kwh,idling_energy_kwh,net_energy_kwh,mileage_kwh_per_km,raw_datapoints,non_null_datapoints,raw_data_pcnt,non_null_data_pcnt
19,16,2025-09-01,19,0.0,0.0,0.0,0.03,0.0,,3365,0,93.47,0.0
38,16,2025-09-02,19,1.5,2.69,0.0,0.16,2.69,1.79,3360,663,93.33,19.73
54,16,2025-09-03,19,0.0,0.0,0.0,0.04,0.0,,2935,0,81.53,0.0
78,16,2025-09-04,19,0.0,0.0,0.0,0.02,0.0,,2952,0,82.0,0.0
102,16,2025-09-05,19,0.0,0.0,0.0,0.26,0.0,,2803,0,77.86,0.0
126,16,2025-09-06,19,0.0,0.0,0.0,0.21,0.0,,2751,0,76.42,0.0
150,16,2025-09-07,19,0.0,0.0,0.0,0.02,0.0,,2270,0,63.06,0.0
174,16,2025-09-08,19,0.0,0.0,0.0,0.26,0.0,,2037,0,56.58,0.0
198,16,2025-09-09,19,0.0,0.0,0.0,0.04,0.0,,2199,0,61.08,0.0
222,16,2025-09-10,19,3.58,4.97,2.22,0.25,2.75,0.77,1577,505,43.81,32.02


In [None]:
!ls prod/nelson/data_points/

In [None]:
df_out.to_csv('prod/nelson/data_points/hourly_analysis.csv')

In [None]:
df_out.groupby(['id', 'date'])['mileage_kwh_per_km'].median()

id  date      
16  2025-09-01    0.950
    2025-09-02    0.970
    2025-09-03    1.020
    2025-09-04    1.120
    2025-09-05    1.000
    2025-09-06    1.130
    2025-09-07    1.110
    2025-09-08    1.070
    2025-09-09    1.200
    2025-09-10    1.010
    2025-09-11    0.940
    2025-09-12    0.880
    2025-09-13    1.060
    2025-09-14    1.095
3   2025-09-01    0.890
    2025-09-02    0.960
    2025-09-03    0.780
    2025-09-04    0.880
    2025-09-05    0.920
    2025-09-06    0.880
    2025-09-07    0.855
    2025-09-08    0.825
    2025-09-09    0.890
    2025-09-10    0.900
    2025-09-11    0.900
    2025-09-12    1.080
    2025-09-13    0.885
    2025-09-14    0.840
Name: mileage_kwh_per_km, dtype: float64

In [None]:
def energy_mileage_stats_15min(df: pd.DataFrame):
    try:
        logging.info("‚öôÔ∏è STEP 3a: Starting 15-minute interval data processing with imputation...")
        df = df.copy()
        df['date'] = df['IST'].dt.date
        # Create 15-minute interval column
        df['15min_interval'] = df['IST'].dt.floor('15T')
        df['time_interval'] = df['15min_interval'].dt.strftime('%H:%M')

        # Map the minute of the 15min_interval to the quarter label
        df['quarter'] = df['15min_interval'].dt.minute.map(lambda m: 
            'Q0' if 0 <= m < 15 else
            'Q1' if 15 <= m < 30 else
            'Q2' if 30 <= m < 45 else
            'Q4'   # for 45-59
        )

        columns_to_impute = ['OdoMeterReading', 'Gear_Position', 'Vehiclereadycondition','Total_Battery_Current', 'Bat_Voltage']
        critical_columns = columns_to_impute

        # Initial stats
        total_rows = len(df)
        non_null_before = df[critical_columns].notnull().all(axis=1).sum()

        def impute_group(group):
            group = group.sort_values('IST')
            for col in columns_to_impute:
                mask = group[col].isnull()
                prev_time = group['IST'].where(~mask).ffill()
                next_time = group['IST'].where(~mask).bfill()
                time_gap = ((next_time - prev_time).dt.total_seconds()).abs()
                eligible = mask & (time_gap <= 300)
                group.loc[eligible, col] = group[col].interpolate(method='linear', limit_direction='both')[eligible]
            return group

        # Calculate total raw rows per id, date, 15-minute interval
        raw_counts = (
            df.groupby(['id', 'date', '15min_interval'])
            .size()
            .reset_index(name='raw_datapoints')
        )

        df = df.groupby('id', group_keys=False).apply(impute_group)
        df = df.dropna(subset=critical_columns)
        df = df[df['Total_Battery_Current'].abs() <= 3000]
        df.sort_values(by=['id', 'IST'], inplace=True)

        all_interval_stats = []
        for (vehicle_id, date_val, interval_val), group in df.groupby(['id', 'date', '15min_interval']):
            # Per-interval stats
            interval_total_rows = len(group)
            time_interval_str = interval_val.strftime('%H:%M')

            
            # GET THE QUARTER FOR THIS INTERVAL
            minute = interval_val.minute
            if minute == 0:
                quarter = 'Q0'
            elif minute == 15:
                quarter = 'Q1'
            elif minute == 30:
                quarter = 'Q2'
            else:  # minute == 45
                quarter = 'Q4'

            raw_datapoints = raw_counts[
                (raw_counts['id'] == vehicle_id) &
                (raw_counts['date'] == date_val) &
                (raw_counts['15min_interval'] == interval_val)
            ]['raw_datapoints'].values
            raw_datapoints = raw_datapoints[0] if len(raw_datapoints) > 0 else interval_total_rows
            
            # Calculate how many were imputed
            interval_non_null_before_impute = raw_datapoints - group[critical_columns].notnull().all(axis=1).sum()
            interval_non_null_after_impute = group[critical_columns].notnull().all(axis=1).sum()
            interval_imputed_count = interval_non_null_after_impute - (raw_datapoints - interval_non_null_before_impute)

            vehicle_df = group.copy()
            vehicle_df['time_diff_seconds'] = vehicle_df['IST'].diff().dt.total_seconds().fillna(0)
            vehicle_df = vehicle_df[vehicle_df['time_diff_seconds'] > 0]
            vehicle_df['power_kW'] = (vehicle_df['Bat_Voltage'] * vehicle_df['Total_Battery_Current']) / 1000

            driving_df = vehicle_df[(vehicle_df['Gear_Position'] == 2.0) | (vehicle_df['Vehiclereadycondition'] == 1.0)].copy()
            driving_df['energy_consumption_kwh'] = driving_df.apply(
                lambda row: row['power_kW'] * (row['time_diff_seconds'] / 3600) if row['power_kW'] > 0 else 0, axis=1)
            driving_df['regen_energy_kwh'] = driving_df.apply(
                lambda row: -row['power_kW'] * (row['time_diff_seconds'] / 3600) if row['power_kW'] < 0 else 0, axis=1)
            driving_df['distance_increment'] = driving_df['OdoMeterReading'].diff().fillna(0)
            driving_df = driving_df[driving_df['distance_increment'] <= 10]

            if not driving_df.empty:
                interval_driving_mileage = driving_df['distance_increment'].sum()
                interval_driving_energy = driving_df['energy_consumption_kwh'].sum()
                interval_regen_energy = driving_df['regen_energy_kwh'].sum()
                non_null_datapoints = len(driving_df)
            else:
                interval_driving_mileage = 0
                interval_driving_energy = 0
                interval_regen_energy = 0
                non_null_datapoints = 0

            stationary_df = vehicle_df[(vehicle_df['Gear_Position'] == 0.0)].copy()
            if not stationary_df.empty:
                stationary_df['energy_kwh'] = stationary_df.apply(
                    lambda row: row['power_kW'] * (row['time_diff_seconds'] / 3600) if row['power_kW'] > 0 else 0, axis=1)
                interval_idling_energy = stationary_df['energy_kwh'].sum()
            else:
                interval_idling_energy = 0

            net_energy_kwh = interval_driving_energy - interval_regen_energy
            threshold = 0.01  # km (lower threshold for 15-min intervals)
            if interval_driving_mileage < threshold:
                mileage_kwh_per_km = np.nan
            else:
                mileage_kwh_per_km = round(net_energy_kwh / interval_driving_mileage, 2)
            
            # Calculate percentages
            # For 15-min intervals, we expect 900 seconds (15*60) of data, but we'll use raw_datapoints count
            raw_data_pcnt = round((raw_datapoints / 900.0) * 100, 2) if raw_datapoints > 0 else 0
            non_null_data_pcnt = round((non_null_datapoints / raw_datapoints) * 100, 2) if raw_datapoints > 0 else 0

            interval_stats_df = pd.DataFrame([{
                'id': vehicle_id,
                'date': date_val,
                'time_interval': time_interval_str,
                'quarter': quarter,  
                'dist_travelled_km': round(interval_driving_mileage, 2),
                'energy_consumed_kwh': round(interval_driving_energy, 2),
                'regen_energy_kwh': round(interval_regen_energy, 2),
                'idling_energy_kwh': round(interval_idling_energy, 2),
                'net_energy_kwh': round(net_energy_kwh, 2),
                'mileage_kwh_per_km': mileage_kwh_per_km,
                'raw_datapoints': raw_datapoints,
                'non_null_datapoints': non_null_datapoints,
                'raw_data_pcnt': raw_data_pcnt,
                'non_null_data_pcnt': non_null_data_pcnt
            }])
            all_interval_stats.append(interval_stats_df)

        if all_interval_stats:
            final_df = pd.concat(all_interval_stats, ignore_index=True)
            logging.info("‚úÖ STEP 3c: All vehicle 15-minute interval data processed.")
            return final_df
        else:
            logging.info("No data found for driving or idling periods.")
            return pd.DataFrame()

    except Exception as e:
        logging.warning(f"An error occurred: {e}")
        return pd.DataFrame()

In [None]:
df_outq = energy_mileage_stats_15min(df_raw)
df_outq.head()

2025-09-23 11:31:30 - INFO - ‚öôÔ∏è STEP 3a: Starting 15-minute interval data processing with imputation...
  df['15min_interval'] = df['IST'].dt.floor('15T')
  df = df.groupby('id', group_keys=False).apply(impute_group)
2025-09-23 11:32:12 - INFO - ‚úÖ STEP 3c: All vehicle 15-minute interval data processed.


Unnamed: 0,id,date,time_interval,quarter,dist_travelled_km,energy_consumed_kwh,regen_energy_kwh,idling_energy_kwh,net_energy_kwh,mileage_kwh_per_km,raw_datapoints,non_null_datapoints,raw_data_pcnt,non_null_data_pcnt
0,16,2025-09-01,00:30,Q2,0.0,1.35,0.0,1.35,1.35,,816,456,90.67,55.88
1,16,2025-09-01,00:45,Q4,16.88,16.92,0.55,0.19,16.37,0.97,758,756,84.22,99.74
2,16,2025-09-01,01:00,Q0,16.0,13.86,1.29,0.0,12.57,0.79,823,822,91.44,99.88
3,16,2025-09-01,01:15,Q1,3.19,5.21,0.05,0.41,5.17,1.62,809,808,89.89,99.88
4,16,2025-09-01,01:30,Q2,19.75,15.75,0.26,0.0,15.49,0.78,769,767,85.44,99.74


In [None]:
# Method 2: Using named aggregations (more readable)
daily_stats = df_outq.groupby(['id', 'date']).agg(
    total_distance=('dist_travelled_km', 'sum'),
    median_mileage=('mileage_kwh_per_km', 'median')
).reset_index()

daily_stats

Unnamed: 0,id,date,total_distance,median_mileage
0,16,2025-09-01,653.92,0.945
1,16,2025-09-02,220.62,0.915
2,16,2025-09-03,448.49,0.96
3,16,2025-09-04,658.82,1.055
4,16,2025-09-05,637.93,0.98
5,16,2025-09-06,651.25,0.99
6,16,2025-09-07,656.33,0.97
7,16,2025-09-08,627.26,1.045
8,16,2025-09-09,603.96,1.085
9,16,2025-09-10,653.38,1.025


In [None]:
# Multiple aggregations on the same column
daily_stats = df_outq.groupby(['id', 'date','quarter']).agg({
    'dist_travelled_km': ['sum', 'mean', 'max'],
    'mileage_kwh_per_km': ['median', 'mean']
}).reset_index()

# This will create a multi-level column index which you can flatten
daily_stats.columns = ['id', 'date','quarter' ,'total_distance', 'avg_distance', 'max_distance', 'median_mileage', 'avg_mileage']
daily_stats

Unnamed: 0,id,date,quarter,total_distance,avg_distance,max_distance,median_mileage,avg_mileage
0,16,2025-09-01,Q0,155.74,7.416190,18.75,0.910,1.142143
1,16,2025-09-01,Q1,141.63,6.437727,19.88,1.065,1.451875
2,16,2025-09-01,Q2,189.74,8.624545,20.00,0.900,0.944375
3,16,2025-09-01,Q4,166.81,7.943333,19.62,0.935,1.095000
4,16,2025-09-02,Q0,60.87,5.072500,18.62,0.905,0.996250
...,...,...,...,...,...,...,...,...
107,3,2025-09-13,Q4,156.43,6.517917,18.67,0.910,1.755263
108,3,2025-09-14,Q0,164.50,6.854167,19.82,0.810,0.942222
109,3,2025-09-14,Q1,145.47,6.612273,17.17,0.790,1.332500
110,3,2025-09-14,Q2,164.05,6.835417,18.33,0.775,0.934375


In [None]:
def energy_mileage_stats_daily(df: pd.DataFrame):
    try:
        logging.info("‚öôÔ∏è STEP 3a: Starting daily data processing with imputation...")
        df = df.copy()
        df['date'] = df['IST'].dt.date

        columns_to_impute = ['OdoMeterReading', 'Gear_Position', 'Vehiclereadycondition', 
                            'Total_Battery_Current', 'Bat_Voltage']
        critical_columns = columns_to_impute

        # Initial stats
        total_rows = len(df)
        non_null_before = df[critical_columns].notnull().all(axis=1).sum()

        def impute_group(group):
            group = group.sort_values('IST')
            for col in columns_to_impute:
                mask = group[col].isnull()
                prev_time = group['IST'].where(~mask).ffill()
                next_time = group['IST'].where(~mask).bfill()
                time_gap = ((next_time - prev_time).dt.total_seconds()).abs()
                eligible = mask & (time_gap <= 300)
                group.loc[eligible, col] = group[col].interpolate(method='linear', limit_direction='both')[eligible]
            return group

        # Calculate total raw rows per id, date
        raw_counts = (
            df.groupby(['id', 'date'])
            .size()
            .reset_index(name='total_raw_ingested')
        )

        df = df.groupby('id', group_keys=False).apply(impute_group)
        df = df.dropna(subset=critical_columns)
        df = df[df['Total_Battery_Current'].abs() <= 3000]
        df.sort_values(by=['id', 'IST'], inplace=True)

        all_daily_stats = []
        for (vehicle_id, date_val), group in df.groupby(['id', 'date']):
            # Per-day stats
            day_total_rows = len(group)
            
            total_raw_ingested = raw_counts[
                (raw_counts['id'] == vehicle_id) &
                (raw_counts['date'] == date_val)
            ]['total_raw_ingested'].values
            total_raw_ingested = total_raw_ingested[0] if len(total_raw_ingested) > 0 else day_total_rows

            vehicle_df = group.copy()
            vehicle_df['time_diff_seconds'] = vehicle_df['IST'].diff().dt.total_seconds().fillna(0)
            vehicle_df = vehicle_df[vehicle_df['time_diff_seconds'] > 0]
            vehicle_df['power_kW'] = (vehicle_df['Bat_Voltage'] * vehicle_df['Total_Battery_Current']) / 1000

            # Driving periods
            driving_df = vehicle_df[
                (vehicle_df['Gear_Position'] == 2.0) | 
                (vehicle_df['Vehiclereadycondition'] == 1.0)
            ].copy()
            
            if not driving_df.empty:
                driving_df['energy_consumption_kwh'] = driving_df.apply(
                    lambda row: row['power_kW'] * (row['time_diff_seconds'] / 3600) if row['power_kW'] > 0 else 0, axis=1)
                driving_df['regen_energy_kwh'] = driving_df.apply(
                    lambda row: -row['power_kW'] * (row['time_diff_seconds'] / 3600) if row['power_kW'] < 0 else 0, axis=1)
                driving_df['distance_increment'] = driving_df['OdoMeterReading'].diff().fillna(0)
                driving_df = driving_df[driving_df['distance_increment'] <= 10]

                daily_driving_mileage = driving_df['distance_increment'].sum()
                daily_driving_energy = driving_df['energy_consumption_kwh'].sum()
                daily_regen_energy = driving_df['regen_energy_kwh'].sum()
                usable_datapoints = len(driving_df)
            else:
                daily_driving_mileage = 0
                daily_driving_energy = 0
                daily_regen_energy = 0
                usable_datapoints = 0

            # Stationary periods
            stationary_df = vehicle_df[vehicle_df['Gear_Position'] == 0.0].copy()
            if not stationary_df.empty:
                stationary_df['energy_kwh'] = stationary_df.apply(
                    lambda row: row['power_kW'] * (row['time_diff_seconds'] / 3600) if row['power_kW'] > 0 else 0, axis=1)
                daily_idling_energy = stationary_df['energy_kwh'].sum()
            else:
                daily_idling_energy = 0

            # Calculate net energy and mileage
            net_energy_kwh = daily_driving_energy - daily_regen_energy
            threshold = 0.1  # km
            if daily_driving_mileage < threshold:
                mileage_kwh_per_km = np.nan
            else:
                mileage_kwh_per_km = round(net_energy_kwh / daily_driving_mileage, 2)
            
            # Calculate data quality percentages
            daily_percent_raw = round((total_raw_ingested / 86400) * 100, 2) if total_raw_ingested > 0 else 0
            daily_percent_acknowledged = round((usable_datapoints / total_raw_ingested) * 100, 2) if total_raw_ingested > 0 else 0

            daily_stats_df = pd.DataFrame([{
                'id': vehicle_id,
                'date': date_val,
                'dist_travelled_km': round(daily_driving_mileage, 2),
                'energy_consumed_kwh': round(daily_driving_energy, 2),
                'regen_energy_kwh': round(daily_regen_energy, 2),
                'idling_energy_kwh': round(daily_idling_energy, 2),
                'net_energy_kwh': round(net_energy_kwh, 2),
                'mileage_kwh_per_km': mileage_kwh_per_km,
                'total_raw_ingested': total_raw_ingested,
                'usable_datapoints': usable_datapoints,
                'daily_percent_raw': daily_percent_raw,
                'daily_percent_acknowledged': daily_percent_acknowledged
            }])
            all_daily_stats.append(daily_stats_df)

        if all_daily_stats:
            final_df = pd.concat(all_daily_stats, ignore_index=True)
            logging.info("‚úÖ STEP 3c: All vehicle daily data processed.")
            return final_df
        else:
            logging.info("No data found for driving or idling periods.")
            return pd.DataFrame()

    except Exception as e:
        logging.warning(f"An error occurred: {e}")
        return pd.DataFrame()

In [None]:
df_out_daily = energy_mileage_stats_daily(df_raw)
df_out_daily.head()

2025-09-23 11:48:20 - INFO - ‚öôÔ∏è STEP 3a: Starting daily data processing with imputation...
  df = df.groupby('id', group_keys=False).apply(impute_group)
2025-09-23 11:48:37 - INFO - ‚úÖ STEP 3c: All vehicle daily data processed.


Unnamed: 0,id,date,dist_travelled_km,energy_consumed_kwh,regen_energy_kwh,idling_energy_kwh,net_energy_kwh,mileage_kwh_per_km,total_raw_ingested,usable_datapoints,daily_percent_raw,daily_percent_acknowledged
0,16,2025-09-01,656.12,781.32,175.24,6.66,606.07,0.92,71377,48521,82.61,67.98
1,16,2025-09-02,221.12,261.11,53.84,9.41,207.27,0.94,49757,21654,57.59,43.52
2,16,2025-09-03,451.12,573.16,138.36,3.79,434.8,0.96,43590,31528,50.45,72.33
3,16,2025-09-04,664.25,823.12,161.87,10.35,661.25,1.0,50621,31342,58.59,61.92
4,16,2025-09-05,656.43,780.35,149.33,15.71,631.02,0.96,47811,30471,55.34,63.73


In [None]:
# Method 2: Using named aggregations (more readable)
daily_stats = df_out_daily.groupby(['id', 'date']).agg(
    total_distance=('dist_travelled_km', 'sum'),
    median_mileage=('mileage_kwh_per_km', 'median')
).reset_index()

daily_stats

Unnamed: 0,id,date,total_distance,median_mileage
0,16,2025-09-01,656.12,0.92
1,16,2025-09-02,221.12,0.94
2,16,2025-09-03,451.12,0.96
3,16,2025-09-04,664.25,1.0
4,16,2025-09-05,656.43,0.96
5,16,2025-09-06,655.19,1.0
6,16,2025-09-07,661.0,0.95
7,16,2025-09-08,640.38,0.95
8,16,2025-09-09,626.31,1.01
9,16,2025-09-10,658.18,0.98
