In [1]:
import pandas as pd

#load filtered jump point data
df = pd.read_csv("../data/filtered_jump_points.csv")

#show first few rows
df.head()

Unnamed: 0,patient_id,label,timestamp,event_type,event_value
0,1,0,2025-01-01 08:00:26.600,flow,25.733894
1,1,0,2025-01-01 08:00:31.000,flow,29.680849
2,1,0,2025-01-01 08:00:40.000,flow,9.589884
3,1,0,2025-01-01 08:00:40.200,flow,4.657168
4,1,0,2025-01-01 08:00:40.600,flow,6.713505


In [2]:
#each jump point row becomes a triple: (event_type, event_value, time_gap)
#covert timestamp column to datetime
df["timestamp"] = pd.to_datetime(df["timestamp"])

#sort by timestamp
df = df.sort_values("timestamp").reset_index(drop=True)

#calculate time gap (in seconds) between events
df["time_gap"] = df["timestamp"].diff().dt.total_seconds().fillna(0)

#show first 10 results
df.head(10)

Unnamed: 0,patient_id,label,timestamp,event_type,event_value,time_gap
0,1,0,2025-01-01 08:00:01.200,pressure,13.766793,0.0
1,1,0,2025-01-01 08:00:01.400,pressure,5.55084,0.2
2,1,0,2025-01-01 08:00:02.400,pressure,10.001789,1.0
3,1,0,2025-01-01 08:00:04.000,pressure,12.761725,1.6
4,1,0,2025-01-01 08:00:04.200,pressure,6.026644,0.2
5,1,0,2025-01-01 08:00:04.600,pressure,5.751649,0.4
6,1,0,2025-01-01 08:00:05.600,pressure,9.020591,1.0
7,1,0,2025-01-01 08:00:05.800,pressure,14.415737,0.2
8,1,0,2025-01-01 08:00:06.000,pressure,5.148983,0.2
9,1,0,2025-01-01 08:00:06.600,pressure,7.398954,0.6


In [5]:
from sklearn.preprocessing import LabelEncoder

#Encode event_type as integer IDs
event_type_encoder = LabelEncoder() #object used to transform event_type column
event_type_encoder.fit(df["event_type"]) #find unique event_type values
df["event_type_id"] = event_type_encoder.transform(df["event_type"])#transform event types into IDs


#save mapping - dictionary to map each event type to its encoded integer
event_type_vocab = dict(zip(event_type_encoder.classes_, event_type_encoder.transform(event_type_encoder.classes_)))
print("Event type to ID mapping: ", event_type_vocab)

#shows unique event_type/id combination
df[["event_type", "event_type_id"]].drop_duplicates()

Event type to ID mapping:  {'flow': np.int64(0), 'leak': np.int64(1), 'minutevent': np.int64(2), 'pressure': np.int64(3), 'resrate': np.int64(4)}


Unnamed: 0,event_type,event_type_id
0,flow,0
8,pressure,3
91,resrate,4
97,minutevent,2
103,leak,1
