In [3]:
import pandas as pd

import math

def openxc_sessionize(df, n_bins=20, num_per_session=10):
    # discretize `accelerator_pedal_position` into `n_bins` bins
    df['accelerator_pedal_position_binned'] = pd.cut(df['accelerator_pedal_position'], bins=n_bins, labels=False)

    # Group by the discretized 'accelerator_pedal_position' and 'brake_pedal_status'
    grouped = df.groupby(['accelerator_pedal_position_binned', 'brake_pedal_status'])

    # Get max number of rows in a group
    print("Number of rows in the DataFrame: ", len(df))
    print("Number of groups: ", grouped.ngroups)
    print("Max number of rows in a group: ", grouped.size().max())


    # Initialize a list to store DataFrame chunks
    df_chunks = []

    # Global session ID counter
    global_session_id = 0

    # Iterate over each group
    for group_key, group in grouped:
        print("Group key: ", group_key)
        # Calculate the number of sessions in this group
        total_rows = len(group)
        sessions_in_group = math.ceil(total_rows / num_per_session)  # Ceiling division

        # Assign session IDs within this group
        for session_number in range(sessions_in_group):
            # Determine the row indices for this session
            session_start = session_number * num_per_session
            session_end = session_start + num_per_session
            # Assign the global session ID to these rows
            df.loc[group.index[session_start:session_end], 'session_id'] = global_session_id
            # Increment the global session ID
            global_session_id += 1
    
    # df.drop(columns=['accelerator_pedal_position'], inplace=True)

    # Check if number of sessions is correct
    assert global_session_id == df['session_id'].nunique() == df.groupby(['session_id', 'accelerator_pedal_position_binned', 'brake_pedal_status']).ngroups

    print("Number of sessions: ", global_session_id)
    

    return df

final_df = openxc_sessionize(pd.read_csv("../data_selected/openxc/nyc_downtown_east.csv"))

Number of rows in the DataFrame:  319343
Number of groups:  21
Max number of rows in a group:  166462
Group key:  (0, False)
Group key:  (0, True)
Group key:  (1, False)
Group key:  (2, False)
Group key:  (3, False)
Group key:  (4, False)
Group key:  (5, False)
Group key:  (6, False)
Group key:  (7, False)
Group key:  (8, False)
Group key:  (9, False)
Group key:  (10, False)
Group key:  (11, False)
Group key:  (12, False)
Group key:  (13, False)
Group key:  (14, False)
Group key:  (15, False)
Group key:  (16, False)
Group key:  (17, False)
Group key:  (18, False)
Group key:  (19, False)
Number of sessions:  31945


In [4]:
final_df.head()

Unnamed: 0,timestamp,brake_pedal_status,accelerator_pedal_position,transmission_gear_position,vehicle_speed,engine_speed,accelerator_pedal_position_binned,session_id
0,1364314000.0,False,0.0,first,0.0,774.0,0,0
1,1364314000.0,False,0.0,first,0.0,774.0,0,0
2,1364314000.0,False,0.0,first,0.0,774.0,0,0
3,1364314000.0,False,0.0,first,0.0,774.0,0,0
4,1364314000.0,False,0.0,first,0.0,774.0,0,0


In [6]:
len(set(final_df['session_id']))

16647

In [12]:
set(final_df['accelerator_pedal_position_binned'])

{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19}

In [2]:
for csv_file in ['nyc_downtown_east.csv', 'taiwan_HighwayNo2_can.csv', 'india_New_Delhi_Railway_to_AIIMS.csv']:
    final_df = openxc_sessionize(pd.read_csv("../data_selected/openxc/" + csv_file))
    final_df.to_csv("../data_selected/openxc/" + csv_file[:-4] + "_sessionized.csv", index=False)
    print("Done with " + csv_file)

Number of rows in the DataFrame:  319343
Number of groups:  21
Max number of rows in a group:  166462
Number of sessions:  31945
Done with nyc_downtown_east.csv
Number of rows in the DataFrame:  416068
Number of groups:  21
Max number of rows in a group:  151733
Number of sessions:  41615
Done with taiwan_HighwayNo2_can.csv
Number of rows in the DataFrame:  154892
Number of groups:  21
Max number of rows in a group:  66989
Number of sessions:  15502
Done with india_New_Delhi_Railway_to_AIIMS.csv
