In [1]:
import pandas as pd
import os
import random
from tqdm import tqdm
import pickle
from sklearn.preprocessing import MinMaxScaler



In [27]:
regenerate_context_data = False
sequence_augmentation = True
whole_session_context = False
model_test_run = False
data_autoencoder = False
pad_to_window_size = True

PATH_TO_LOAD = '../../carsii_datasets/data/04_Merged'
combined_context_path = '../../carsii_datasets/data/05_Interaction_Sequences/context.csv'

window = 100 #seconds

base_path = '../datasets/sequential/'
augmentation_folder = 'aug/' if sequence_augmentation else 'non_aug/'
if model_test_run:
    augmentation_folder = 'test/aug/' if sequence_augmentation else 'test/non_aug/'

sequence_context_path = f'{base_path}{augmentation_folder}parameters/sequence_context.csv'
parameter_path = f'{base_path}{augmentation_folder}parameters'
train_session_path = f'{base_path}{augmentation_folder}parameters/train_sessions.pkl'
test_session_path = f'{base_path}{augmentation_folder}parameters/test_sessions.pkl'
train_dynamic_context_path = f'{base_path}{augmentation_folder}dynamic_context/train.csv'
test_dynamic_context_path = f'{base_path}{augmentation_folder}dynamic_context/test.csv'
train_static_context_path = f'{base_path}{augmentation_folder}static_context/train.csv'
test_static_context_path = f'{base_path}{augmentation_folder}static_context/test.csv'

In [3]:
all_columns = ['index', 'avg_irradiation', 'steering_speed', 'temperature_out', 'hour',
       'month', 'odometer', 'light_sensor_rear', 'light_sensor_front',
       'temperature_in', 'KBI_speed', 'soc', 'ESP_speed', 'latitude',
       'longitude', 'seatbelt_codriver', 'seatbelt_rear_l', 'seatbelt_rear_m',
       'seatbelt_rear_r', 'CHA_ESP_drive_mode', 'CHA_MO_drive_mode',
       'rain_sensor', 'street_category', 'kickdown', 'altitude',
       'driving_program', 'datetime', 'session', 'Label', 'ID',
       'FunctionValue', 'domain', 'BeginTime', 'time_second',
       'distance_driven', 'ts_normalized', 'weekday']

selected = [ 'avg_irradiation', 'steering_speed', 'temperature_out', 'hour',
       'month', 'light_sensor_rear', 'light_sensor_front',
       'temperature_in', 'KBI_speed', 'soc', 'latitude',
       'longitude', 'seatbelt_codriver', 'seatbelt_rear_l',
       'seatbelt_rear_r', 'street_category', 'altitude',
       'datetime', 'session', 'time_second',
       'distance_driven', 'weekday'
]

bad_quality = ['CHA_ESP_drive_mode', 
             'CHA_MO_drive_mode',
             'rain_sensor',
             'kickdown',
             'ESP_speed',
             'seatbelt_rear_m',
            'driving_program',
            'ts_normalized'
             ]

dynamic_context_var = ['avg_irradiation', 'steering_speed', 'temperature_out', 
                       'light_sensor_rear', 'light_sensor_front', 
                       'temperature_in', 'KBI_speed', 'soc', 'latitude',
                       'longitude',  'street_category', 'altitude','time_second',
                       'distance_driven']
static_context_var = ['car_id', 'month', 'weekday', 'hour', 'seatbelt_codriver', 'seatbelt_rear_l',
                       'seatbelt_rear_r',]
#Todo take average of these value over a window
constant_context_var = ['avg_irradiation','temperature_out'] #to be filled

In [4]:
def load_context(vehicle):
    context_lists = dynamic_context_var + static_context_var + ['session', 'datetime']
    context_lists.remove('car_id')
    df = pd.read_csv(os.path.join(PATH_TO_LOAD, vehicle + "_merged.csv"), parse_dates=['datetime'], usecols=context_lists)
    # df_filt = df[context_lists]
    # df_filt = df_filt.dropna(subset=['KBI_speed'])
    df = df.sort_values(by=['session','datetime'])
    return df

vehicles = ['SEB880','SEB882','SEB883','SEB885','SEB888','SEB889']
context_data_list = []
if regenerate_context_data == True:
    for vehicle in tqdm(vehicles):
        context_curr = load_context(vehicle)
        context_curr['car_id'] = vehicle
        context_data_list.append(context_curr)
    context_data = pd.concat(context_data_list, axis=0)
    context_data.to_csv(combined_context_path)
else:
    context_data = pd.read_csv(combined_context_path, parse_dates=['datetime'], index_col=0)

In [5]:
vehicle_list = context_data.car_id.unique().tolist()
random.shuffle(vehicle_list)
vehicle_dict = {}
used_values = set()
for vehicle in vehicle_list:
    value = random.randint(1, len(vehicle_list))
    while value in used_values:
        value = random.randint(1, len(vehicle_list))
    vehicle_dict[vehicle] = value
    used_values.add(value)

context_data['car_id'] = context_data['car_id'].map(vehicle_dict)
context_data = context_data.sort_values(by=['session','datetime'])

context_data['session'] = context_data['session'].astype(int)
context_data = context_data[dynamic_context_var + static_context_var + ['session', 'datetime']]
context_data = context_data.groupby(['session', 'datetime'])[dynamic_context_var + static_context_var].mean().reset_index()

with open(train_session_path, 'rb') as pickle_file:
    train_sessions = pickle.load(pickle_file)
with open(test_session_path, 'rb') as pickle_file:
    test_sessions = pickle.load(pickle_file)
context_data = context_data[context_data['session'].isin(train_sessions + test_sessions)]

selected_sequence = pd.read_csv(sequence_context_path, parse_dates=['datetime'], index_col=0).reset_index()
selected_sequence['session'] = selected_sequence['session'].astype(int)
min_datetime_indices = selected_sequence.groupby('session')['datetime'].idxmin()
selected_sequence = selected_sequence.drop(min_datetime_indices)
selected_sequence.reset_index(drop=True, inplace=True)

selected_dfs = []
for session in tqdm(selected_sequence['session'].unique().tolist()):
    selected_sequence_curr = selected_sequence[selected_sequence['session']==session]
    context_data_curr = context_data[context_data['session']==session]
    context_data_curr = context_data_curr[context_data_curr['datetime']<=selected_sequence_curr['datetime'].max()]
    selected_dfs.append(context_data_curr)
training_sequence_context = pd.concat(selected_dfs, axis=0)

In [10]:
# testing_sessions = [16, 25]
# selected_sequence = selected_sequence[selected_sequence['session'].isin(testing_sessions)]
# training_sequence_context = training_sequence_context[training_sequence_context['session'].isin(testing_sessions)]

window_id = 0
if sequence_augmentation == True:
    grouped_selected_sequence = selected_sequence.groupby('session')
    augmented_frames = []
    for session, selected_sequence_curr in tqdm(grouped_selected_sequence):
        for i, row in selected_sequence_curr.iloc[::-1].iterrows():
            context_filt_curr = training_sequence_context[
                (training_sequence_context['session'] == session) &
                (training_sequence_context['datetime'] <= row['datetime'])
            ].copy()
            if not whole_session_context:
                context_filt_curr = context_filt_curr.tail(window)
            context_filt_curr['window_id'] = window_id
            # context_filt_curr['session'] = session
            augmented_frames.append(context_filt_curr)
            window_id += 1
    training_sequence_context_augmented = pd.concat(augmented_frames, axis=0)
    context_data = training_sequence_context_augmented.reset_index(drop=True)
else:
    # if sequence_augmentation is set to false
    if not whole_session_context:
        context_data = training_sequence_context.groupby('session').tail(window)
    context_data = training_sequence_context.reset_index(drop=True)
    context_data['window_id'] = context_data.groupby('session').ngroup()

100%|██████████| 1203/1203 [01:54<00:00, 10.48it/s]


In [11]:
dynamic_context = context_data[dynamic_context_var + ['window_id', 'session', 'datetime']]
print('number of dynamic context session', len(dynamic_context.window_id.unique().tolist()))

# function to pad first value to fit the window size
if pad_to_window_size:
    df = dynamic_context.copy()
    session_counts = df.groupby('window_id').size()
    less_than_100 = session_counts[session_counts < window].index.tolist()
    print(f'Number of window with window length less than {window}: ', len(less_than_100))
    window100_dfs = df[~df['window_id'].isin(less_than_100)]
    empty_df = []
    for window_id in tqdm(less_than_100):
        sub_df = df[df['window_id'] == window_id]
        rows_to_pad = window - len(sub_df)
        min_datetime_row = sub_df.loc[sub_df['datetime'].idxmin()]
        pad_df = pd.DataFrame(min_datetime_row, df.columns).transpose()
        pad_df = pd.concat([pad_df] * int(rows_to_pad), ignore_index=True, axis=0)

        padded_df = pd.concat([pad_df, sub_df], axis=0).reset_index(drop=True)
        padded_df['window_id'] = window_id
        empty_df.append(padded_df)
    if empty_df:
        df = pd.concat(empty_df, axis=0).reset_index(drop=True)
        df = pd.concat([df, window100_dfs], axis=0).sort_values(by=['window_id']).reset_index(drop=True)
        session_counts = df.groupby('window_id').size()
        less_than_100 = session_counts[session_counts < window].index.tolist()
        print(f'Number of window with window length less than {window}: ', len(less_than_100))
        dynamic_context = df

static_context = context_data[static_context_var + ['window_id', 'session', 'datetime']]
print('number of windows', len(dynamic_context.window_id.unique().tolist()), len(static_context.window_id.unique().tolist()))
print('number of session', len(dynamic_context.session.unique().tolist()), len(static_context.session.unique().tolist()))

number of dynamic context session 5454
Number of window with window length less than 100:  157


100%|██████████| 157/157 [00:00<00:00, 164.35it/s]


Number of window with window length less than 100:  0
number of windows 5454 5454
number of session 1203 1203


In [14]:
static_context_list = []
grouped_static_context = static_context.groupby('window_id')

for window, static_context_curr in tqdm(grouped_static_context):
    unique_curr = static_context_curr.drop_duplicates(subset=static_context_curr.columns.difference(['datetime']))
    if len(unique_curr) > 1:
        most_repeated_values = static_context_curr.mode().iloc[0]
        result_df = pd.DataFrame(most_repeated_values).transpose()
        result_df = result_df.reset_index(drop=True)
        static_context_list.append(result_df)
    else:
        static_context_list.append(unique_curr)

if static_context_list:
    static_context = pd.concat(static_context_list, axis=0).reset_index(drop=True)

train_dynamic_context = dynamic_context[dynamic_context['session'].isin(train_sessions)]
test_dynamic_context = dynamic_context[dynamic_context['session'].isin(test_sessions)]
train_static_context = static_context[static_context['session'].isin(train_sessions)]
test_static_context = static_context[static_context['session'].isin(test_sessions)]

train_dynamic_context['window_id'] = train_dynamic_context.groupby('window_id').ngroup()
test_dynamic_context['window_id'] = test_dynamic_context.groupby('window_id').ngroup()
train_static_context['window_id'] = train_static_context.groupby('window_id').ngroup()
test_static_context['window_id'] = test_static_context.groupby('window_id').ngroup()
# if data_autoencoder:
#     train_dynamic_context.to_csv('../pretrain/time-series-autoencoder/data/dynamic_context_train.csv', index=False)
#     test_dynamic_context.to_csv('../pretrain/time-series-autoencoder/data/dynamic_context_test.csv', index=False)

print('number of session', len(train_dynamic_context.window_id.unique().tolist()), len(test_dynamic_context.window_id.unique().tolist()),
       len(train_static_context.window_id.unique().tolist()), len(test_static_context.window_id.unique().tolist()))

train_dynamic_context.to_csv(f'{base_path}{augmentation_folder}dynamic_context/unnormal/train.csv', index=False)
test_dynamic_context.to_csv(f'{base_path}{augmentation_folder}dynamic_context/unnormal/test.csv', index=False)

100%|██████████| 5454/5454 [00:09<00:00, 555.69it/s]


number of session 4426 1028 4426 1028


In [19]:
#Normalisation
dynamic_context_to_normalize = [col for col in train_dynamic_context.columns if col not in ['window_id', 'session_ids', 'datetime', 'session_id', 'session']]
scaler_dynamic_context = MinMaxScaler()
scaler_dynamic_context.fit(train_dynamic_context[dynamic_context_to_normalize])
train_dynamic_context[dynamic_context_to_normalize] = scaler_dynamic_context.transform(train_dynamic_context[dynamic_context_to_normalize])
test_dynamic_context[dynamic_context_to_normalize] = scaler_dynamic_context.transform(test_dynamic_context[dynamic_context_to_normalize])

train_dynamic_context.to_csv(train_dynamic_context_path, index=False)
test_dynamic_context.to_csv(test_dynamic_context_path, index=False)

train_static_context.to_csv(train_static_context_path, index=False)
test_static_context.to_csv(test_static_context_path, index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train_dynamic_context[dynamic_context_to_normalize] = scaler_dynamic_context.transform(train_dynamic_context[dynamic_context_to_normalize])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_dynamic_context[dynamic_context_to_normalize] = scaler_dynamic_context.transform(test_dynamic_context[dynamic_context_to_normalize])


In [21]:
test_static_context

Unnamed: 0,car_id,month,weekday,hour,seatbelt_codriver,seatbelt_rear_l,seatbelt_rear_r,window_id,session,datetime
3,2.0,9.0,6.0,10.0,1.0,1.0,1.0,0,25,2022-09-11 10:26:09
13,2.0,12.0,2.0,10.0,0.0,0.0,0.0,1,94,2022-12-07 10:02:41
25,2.0,12.0,4.0,11.0,0.0,0.0,1.0,2,172,2022-12-23 11:37:53
26,2.0,12.0,4.0,11.0,0.0,0.0,1.0,3,172,2022-12-23 11:30:18
27,2.0,12.0,4.0,16.0,0.0,0.0,0.0,4,173,2022-12-23 16:31:26
...,...,...,...,...,...,...,...,...,...,...
5429,3.0,3.0,3.0,9.0,0.0,0.0,0.0,1023,5346,2023-03-16 09:22:49
5430,3.0,3.0,3.0,9.0,0.0,0.0,0.0,1024,5346.0,2023-03-16 08:59:54
5431,3.0,3.0,3.0,8.0,0.0,0.0,0.0,1025,5346,2023-03-16 08:55:25
5452,3.0,3.0,4.0,16.0,1.0,1.0,1.0,1026,5399,2023-03-31 16:08:17


In [22]:
test_dynamic_context

Unnamed: 0,avg_irradiation,steering_speed,temperature_out,light_sensor_rear,light_sensor_front,temperature_in,KBI_speed,soc,latitude,longitude,street_category,altitude,time_second,distance_driven,window_id,session,datetime
300,0.056226,0.000000,0.588889,0.185771,1.000000,0.471154,0.092385,0.542612,0.930608,0.675414,1.000000,0.397644,0.001635,0.256636,0,25,2022-09-11 10:27:23
301,0.050000,0.074447,0.588889,0.189723,0.926217,0.471154,0.049938,0.542612,0.930615,0.675442,0.142857,0.396859,0.001824,0.256636,0,25,2022-09-11 10:27:32
302,0.050000,0.121574,0.588889,0.193676,0.763743,0.471154,0.047988,0.542612,0.930615,0.675439,0.142857,0.397644,0.001803,0.256636,0,25,2022-09-11 10:27:31
303,0.050000,0.042750,0.588889,0.189718,0.805838,0.471154,0.067755,0.542612,0.930614,0.675434,0.142857,0.397644,0.001761,0.256636,0,25,2022-09-11 10:27:29
304,0.050000,0.000000,0.588889,0.180241,0.938509,0.471154,0.087548,0.542612,0.930613,0.675429,1.000000,0.397644,0.001740,0.256636,0,25,2022-09-11 10:27:28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
545395,0.037789,0.017697,0.433333,0.094862,0.396200,0.682692,0.283442,0.583964,0.926795,0.703405,0.428571,0.600094,0.034237,0.000716,1027,5399,2023-03-31 16:07:06
545396,0.030000,0.007759,0.433333,0.083270,0.418086,0.682692,0.309689,0.583964,0.926796,0.703386,0.142857,0.598962,0.034216,0.000716,1027,5399,2023-03-31 16:07:05
545397,0.032197,0.005374,0.433333,0.083004,0.444528,0.682692,0.313598,0.583964,0.926797,0.703365,0.428571,0.597830,0.034195,0.000716,1027,5399,2023-03-31 16:07:04
545398,0.030000,0.000000,0.422222,0.103075,0.618479,0.682692,0.235222,0.583459,0.926779,0.703476,0.285714,0.605755,0.034405,0.000716,1027,5399,2023-03-31 16:07:14


In [23]:
train_static_context

Unnamed: 0,car_id,month,weekday,hour,seatbelt_codriver,seatbelt_rear_l,seatbelt_rear_r,window_id,session,datetime
0,2.0,9.0,4.0,18.0,0.0,1.0,0.0,0,16,2022-09-09 18:41:18
1,2.0,9.0,4.0,18.0,0.0,1.0,0.0,1,16,2022-09-09 18:22:09
2,2.0,9.0,4.0,18.0,0.0,1.0,0.0,2,16,2022-09-09 18:00:40
4,2.0,9.0,0.0,17.0,0.0,0.0,0.0,3,33,2022-09-12 17:08:04
5,2.0,9.0,1.0,7.0,1.0,0.0,0.0,4,35,2022-09-13 07:42:01
...,...,...,...,...,...,...,...,...,...,...
5447,3.0,3.0,3.0,8.0,0.0,0.0,0.0,4421,5364,2023-03-23 08:22:33
5448,3.0,3.0,3.0,8.0,0.0,0.0,0.0,4422,5364,2023-03-23 08:22:33
5449,3.0,3.0,1.0,12.0,0.0,0.0,0.0,4423,5387,2023-03-28 12:52:30
5450,3.0,3.0,1.0,14.0,0.0,0.0,0.0,4424,5388,2023-03-28 14:04:02


In [24]:
train_dynamic_context

Unnamed: 0,avg_irradiation,steering_speed,temperature_out,light_sensor_rear,light_sensor_front,temperature_in,KBI_speed,soc,latitude,longitude,street_category,altitude,time_second,distance_driven,window_id,session,datetime
0,0.00,0.0,0.666667,0.027668,0.122429,0.663462,0.0,0.571357,0.930601,0.675377,0.142857,0.393889,0.066776,0.256520,0,16,2022-09-09 18:41:18
1,0.00,0.0,0.666667,0.031621,0.133203,0.663462,0.0,0.571357,0.930601,0.675379,1.000000,0.393115,0.068474,0.256520,0,16,2022-09-09 18:42:39
2,0.00,0.0,0.666667,0.031621,0.133203,0.663462,0.0,0.571357,0.930601,0.675379,1.000000,0.393115,0.068453,0.256520,0,16,2022-09-09 18:42:38
3,0.00,0.0,0.666667,0.031621,0.133203,0.663462,0.0,0.571357,0.930601,0.675379,1.000000,0.393115,0.068432,0.256520,0,16,2022-09-09 18:42:37
4,0.00,0.0,0.666667,0.031621,0.132943,0.663462,0.0,0.571357,0.930601,0.675379,1.000000,0.393115,0.068390,0.256520,0,16,2022-09-09 18:42:35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
545195,0.01,0.0,0.433333,0.027668,0.509408,0.661372,0.0,0.594049,0.935801,0.679297,0.714286,0.341032,0.128855,0.002931,4425,5389,2023-03-28 18:37:19
545196,0.01,0.0,0.433333,0.027668,0.504374,0.660746,0.0,0.593545,0.935801,0.679297,0.000000,0.341032,0.128834,0.002931,4425,5389,2023-03-28 18:37:18
545197,0.01,0.0,0.433333,0.027668,0.503357,0.655942,0.0,0.593545,0.935801,0.679297,0.000000,0.341032,0.128813,0.002931,4425,5389,2023-03-28 18:37:17
545198,0.01,0.0,0.433333,0.027668,0.503428,0.653846,0.0,0.593041,0.935801,0.679297,1.000000,0.341032,0.128792,0.002931,4425,5389,2023-03-28 18:37:16


In [50]:
def session_window_mapping(df):
    session_window_dict = {}

    for index, row in df.iterrows():
        session = int(row['session'])
        window_id = int(row['window_id'])
        
        # If the session is not already in the dictionary, initialize an empty set
        if session not in session_window_dict:
            session_window_dict[session] = set()
        
        # Add the window_id to the set corresponding to the session
        session_window_dict[session].add(window_id)

    # Convert sets to lists in the resulting dictionary
    session_window_dict = {session: list(window_ids) for session, window_ids in session_window_dict.items()}
    return session_window_dict


train_session_win_id_mapping_dc = session_window_mapping(train_dynamic_context)
test_session_win_id_mapping_dc = session_window_mapping(test_dynamic_context)
train_session_win_id_mapping_sc = session_window_mapping(train_static_context)
test_session_win_id_mapping_sc = session_window_mapping(test_static_context)

with open(os.path.join(parameter_path, 'session_win_id_mapping.pkl'), 'rb') as pickle_file:
    train_session_win_id_mapping = pickle.load(pickle_file)
    test_session_win_id_mapping = pickle.load(pickle_file)

In [51]:
train_session_win_id_mapping_sc

{16: [0, 1, 2],
 33: [3],
 35: [4],
 40: [5],
 42: [6],
 50: [8, 9, 7],
 52: [10, 11],
 150: [12, 13, 14, 15, 16],
 151: [17],
 163: [18, 19],
 164: [20, 21, 22],
 177: [24, 25, 23],
 183: [26, 27, 28],
 186: [32, 29, 30, 31],
 188: [33],
 189: [34, 35, 36, 37],
 197: [38],
 199: [40, 41, 39],
 202: [42],
 203: [43],
 205: [44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54],
 206: [56, 57, 58, 55],
 209: [59, 60, 61],
 213: [64, 65, 62, 63],
 253: [66],
 255: [67, 68, 69, 70],
 256: [72, 71],
 258: [73, 74],
 274: [75],
 357: [76],
 387: [77, 78, 79, 80, 81, 82, 83],
 397: [84, 85, 86],
 404: [88, 87],
 405: [89, 90],
 406: [91],
 408: [92, 93, 94],
 411: [96, 95],
 413: [97,
  98,
  99,
  100,
  101,
  102,
  103,
  104,
  105,
  106,
  107,
  108,
  109,
  110,
  111,
  112,
  113,
  114,
  115,
  116,
  117,
  118,
  119,
  120,
  121,
  122],
 416: [123, 124],
 418: [125, 126],
 420: [128, 127],
 422: [129, 130],
 425: [131],
 426: [132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 1

In [52]:
train_session_win_id_mapping_dc

{16: [0, 1, 2],
 33: [3],
 35: [4],
 40: [5],
 42: [6],
 50: [8, 9, 7],
 52: [10, 11],
 150: [12, 13, 14, 15, 16],
 151: [17],
 163: [18, 19],
 164: [20, 21, 22],
 177: [24, 25, 23],
 183: [26, 27, 28],
 186: [32, 29, 30, 31],
 188: [33],
 189: [34, 35, 36, 37],
 197: [38],
 199: [40, 41, 39],
 202: [42],
 203: [43],
 205: [44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54],
 206: [56, 57, 58, 55],
 209: [59, 60, 61],
 213: [64, 65, 62, 63],
 253: [66],
 255: [67, 68, 69, 70],
 256: [72, 71],
 258: [73, 74],
 274: [75],
 357: [76],
 387: [77, 78, 79, 80, 81, 82, 83],
 397: [84, 85, 86],
 404: [88, 87],
 405: [89, 90],
 406: [91],
 408: [92, 93, 94],
 411: [96, 95],
 413: [97,
  98,
  99,
  100,
  101,
  102,
  103,
  104,
  105,
  106,
  107,
  108,
  109,
  110,
  111,
  112,
  113,
  114,
  115,
  116,
  117,
  118,
  119,
  120,
  121,
  122],
 416: [123, 124],
 418: [125, 126],
 420: [128, 127],
 422: [129, 130],
 425: [131],
 426: [132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 1

In [54]:
if train_session_win_id_mapping_dc == train_session_win_id_mapping_sc == train_session_win_id_mapping:
    print("All training data mapping are exactly identical.")
if test_session_win_id_mapping_dc == test_session_win_id_mapping_sc == test_session_win_id_mapping:
    print("All testing data mapping are exactly identical.")

All training data mapping are exactly identical.
All testing data mapping are exactly identical.


In [20]:
print(len(train_static_context.session.unique().tolist()), len(test_static_context.session.unique().tolist()))
print(len(train_dynamic_context.window_id.unique().tolist()), len(test_dynamic_context.window_id.unique().tolist()))

963 240
4426 1028
