In [62]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [63]:
df1 = pd.read_csv("Sepsis_Cases_Log.csv")
df1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15099 entries, 0 to 15098
Data columns (total 34 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Case ID                    15075 non-null  object 
 1   Activity                   15099 non-null  object 
 2   Complete Timestamp         15099 non-null  object 
 3   Variant                    15099 non-null  object 
 4   Variant index              15099 non-null  int64  
 5   lifecycle:transition       15099 non-null  object 
 6   org:group                  15099 non-null  object 
 7   InfectionSuspected         1013 non-null   object 
 8   DiagnosticBlood            1013 non-null   object 
 9   DisfuncOrg                 1013 non-null   object 
 10  SIRSCritTachypnea          1013 non-null   object 
 11  Hypotensie                 1013 non-null   object 
 12  SIRSCritHeartRate          1013 non-null   object 
 13  Infusion                   1013 non-null   obj

In [64]:
df1.head(30)

Unnamed: 0,Case ID,Activity,Complete Timestamp,Variant,Variant index,lifecycle:transition,org:group,InfectionSuspected,DiagnosticBlood,DisfuncOrg,...,SIRSCritLeucos,Oligurie,DiagnosticLacticAcid,Diagnose,Hypoxie,DiagnosticUrinarySediment,DiagnosticECG,Leucocytes,CRP,LacticAcid
0,A,ER Registration,15:41.0,Variant 61,61,complete,A,True,True,True,...,False,False,True,A,False,True,True,,,
1,A,Leucocytes,27:00.0,Variant 61,61,complete,B,,,,...,,,,,,,,9.6,,
2,A,CRP,27:00.0,Variant 61,61,complete,B,,,,...,,,,,,,,,21.0,
3,A,LacticAcid,27:00.0,Variant 61,61,complete,B,,,,...,,,,,,,,,,2.2
4,A,ER Triage,33:37.0,Variant 61,61,complete,C,,,,...,,,,,,,,,,
5,A,ER Sepsis Triage,34:00.0,Variant 61,61,complete,A,,,,...,,,,,,,,,,
6,A,IV Liquid,03:47.0,Variant 61,61,complete,A,,,,...,,,,,,,,,,
7,A,IV Antibiotics,03:47.0,Variant 61,61,complete,A,,,,...,,,,,,,,,,
8,A,Admission NC,13:19.0,Variant 61,61,complete,D,,,,...,,,,,,,,,,
9,A,CRP,00:00.0,Variant 61,61,complete,B,,,,...,,,,,,,,,109.0,


In [65]:
df1.describe()
df1 = df1.drop(columns=['org:group', 'Complete Timestamp', 'Variant', 'Variant index', 'Diagnose', 'lifecycle:transition'])



In [66]:
df1.head()

Unnamed: 0,Case ID,Activity,InfectionSuspected,DiagnosticBlood,DisfuncOrg,SIRSCritTachypnea,Hypotensie,SIRSCritHeartRate,Infusion,DiagnosticArtAstrup,...,DiagnosticUrinaryCulture,SIRSCritLeucos,Oligurie,DiagnosticLacticAcid,Hypoxie,DiagnosticUrinarySediment,DiagnosticECG,Leucocytes,CRP,LacticAcid
0,A,ER Registration,True,True,True,True,True,True,True,True,...,True,False,False,True,False,True,True,,,
1,A,Leucocytes,,,,,,,,,...,,,,,,,,9.6,,
2,A,CRP,,,,,,,,,...,,,,,,,,,21.0,
3,A,LacticAcid,,,,,,,,,...,,,,,,,,,,2.2
4,A,ER Triage,,,,,,,,,...,,,,,,,,,,


In [67]:

df_act_flow = pd.DataFrame()

# Loop through each unique Case ID
for case in df1['Case ID'].unique():
    # Filter the data for the current Case ID
    case_data = df1[df1['Case ID'] == case]
    
    # Extract the activities as a list
    activity_flow = case_data['Activity'].tolist()
    
    # Add the Case ID as the first column and the activities as subsequent columns
    row_data = [case] + activity_flow
    
    # Append the row to the final DataFrame
    df_act_flow = pd.concat([df_act_flow, pd.DataFrame([row_data])], ignore_index=True)

# Rename columns: first column as 'Case ID', remaining columns as 'Activity 1', 'Activity 2', etc.
columns = ['Case ID'] + [f'Activity {i+1}' for i in range(df_act_flow.shape[1] - 1)]
df_act_flow.columns = columns

# Inspect the resulting DataFrame
print(df_act_flow.head(10))



  Case ID       Activity 1  Activity 2        Activity 3      Activity 4  \
0       A  ER Registration  Leucocytes               CRP      LacticAcid   
1       B  ER Registration   ER Triage               CRP      LacticAcid   
2       C  ER Registration   ER Triage  ER Sepsis Triage      Leucocytes   
3       D  ER Registration   ER Triage  ER Sepsis Triage             CRP   
4       E  ER Registration   ER Triage  ER Sepsis Triage       IV Liquid   
5       F  ER Registration   ER Triage  ER Sepsis Triage      Leucocytes   
6       G  ER Registration   ER Triage  ER Sepsis Triage       IV Liquid   
7       H  ER Registration   ER Triage  ER Sepsis Triage             CRP   
8       I  ER Registration   ER Triage  ER Sepsis Triage  IV Antibiotics   
9       J  ER Registration   ER Triage  ER Sepsis Triage             CRP   

       Activity 5        Activity 6      Activity 7      Activity 8  \
0       ER Triage  ER Sepsis Triage       IV Liquid  IV Antibiotics   
1      Leucocytes  ER

In [70]:
# Filter the DataFrame to include only rows where Activity == 'ER Registration'
df_er_registration = df1[df1['Activity'] == 'ER Registration']

# Reset index for cleanliness (optional)
df_er_registration = df_er_registration.reset_index(drop=True)

# Inspect the resulting DataFrame
print(df_er_registration.head(20))

   Case ID         Activity InfectionSuspected DiagnosticBlood DisfuncOrg  \
0        A  ER Registration               True            True       True   
1        B  ER Registration               True            True      False   
2        C  ER Registration               True            True      False   
3        D  ER Registration               True            True      False   
4        E  ER Registration               True            True      False   
5        F  ER Registration               True            True      False   
6        G  ER Registration               True            True      False   
7        H  ER Registration              False           False      False   
8        I  ER Registration               True            True      False   
9        J  ER Registration               True            True      False   
10       K  ER Registration               True            True      False   
11       L  ER Registration               True            True      False   

In [72]:
df_other_attributes = df_er_registration.drop(columns=['Activity'])
df_other_attributes.head()

Unnamed: 0,Case ID,InfectionSuspected,DiagnosticBlood,DisfuncOrg,SIRSCritTachypnea,Hypotensie,SIRSCritHeartRate,Infusion,DiagnosticArtAstrup,Age,...,DiagnosticUrinaryCulture,SIRSCritLeucos,Oligurie,DiagnosticLacticAcid,Hypoxie,DiagnosticUrinarySediment,DiagnosticECG,Leucocytes,CRP,LacticAcid
0,A,True,True,True,True,True,True,True,True,85.0,...,True,False,False,True,False,True,True,,,
1,B,True,True,False,True,False,True,True,False,45.0,...,True,False,False,True,False,True,True,,,
2,C,True,True,False,False,False,True,True,True,55.0,...,True,False,False,True,False,True,True,,,
3,D,True,True,False,True,False,True,True,True,70.0,...,False,False,False,True,False,False,True,,,
4,E,True,True,False,True,False,True,True,True,75.0,...,True,False,False,True,False,True,True,,,
