# Building up on Question 2.1.1 , adding min,max,std,skew,mean and tsfresh features

In [1]:
# Core Libraries
import os
import glob
import itertools

# Data Handling
import numpy as np
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq

# Progress Bar
from tqdm import tqdm

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.tree import export_graphviz
from IPython.display import Image
from sklearn.metrics import ConfusionMatrixDisplay, roc_curve, auc

# Statistics
from scipy.stats import skew, kurtosis, randint

# Modeling
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    average_precision_score,
    recall_score,
    confusion_matrix,
    roc_auc_score,
)
from sklearn import linear_model

In [2]:

static_variables = ['RecordID', 'Age', 'Gender', 'Height', 'ICUType', 'Weight']

static_variables.remove('ICUType')

static_variables_we_want = ['Age', 'Gender', 'Height', 'Weight']
all_variables = ['Weight', 'Age', 'TroponinI', 'DiasABP', 'MechVent', 'HCO3', 'Cholesterol', 'HCT', 'SaO2', 'WBC', 'SysABP', 'Urine', 'ICUType', 'Gender', 'ALP', 'Creatinine', 'K', 'AST', 'Glucose', 'RespRate', 'MAP', 'FiO2', 'BUN', 'Na', 'Bilirubin', 'TroponinT', 'PaCO2', 'Height', 'GCS', 'HR', 'pH', 'PaO2', 'Lactate', 'ALT', 'NISysABP', 'RecordID', 'Platelets', 'Temp', 'Mg', 'NIDiasABP', 'Albumin', 'NIMAP']
dyn_variables = [x for x in all_variables if x not in static_variables]
dyn_variables.remove('ICUType')
dyn_variables.append('Weight_VAR')
len(dyn_variables), len(static_variables_we_want)

initial_column_lists = static_variables_we_want + dyn_variables

In [3]:

# import parquet file 
df_a =pd.read_parquet('data/processed_raw_data_set-a_1.parquet', engine='pyarrow')
df_b =pd.read_parquet('data/processed_raw_data_set-b_1.parquet', engine='pyarrow')
df_c =pd.read_parquet('data/processed_raw_data_set-c_1.parquet', engine='pyarrow')

drop_ICUType = True 
if drop_ICUType:
    df_a = df_a.drop(columns=['ICUType'])
    df_b = df_b.drop(columns=['ICUType'])
    df_c = df_c.drop(columns=['ICUType'])


#  drop Time variable in df_a
if 'Time' in df_a.columns:
    df_a = df_a.drop(columns=['Time'])
    df_b = df_b.drop(columns=['Time'])
    df_c = df_c.drop(columns=['Time'])




## Computing features vectors of our patient

Instead of working on the table where the missing values had already been filled, i prefer working on the not filled table because otherwise filled values would be taken into the mean and might flatten patient with lots of missing values. Then I compute the mean of variables for eahc patient over the 49 timestamps. 

Then i compute the median on the resulting table to fill the missing values

In [4]:
len(static_variables_we_want) , len(dyn_variables), len(static_variables_we_want) + len(dyn_variables) 

(4, 37, 41)

In [5]:
# we define aggregation functions
agg_funcs = {col: ['mean','std','max','min','skew'] for col in dyn_variables}  

agg_funcs['RecordID'] = 'first'  # Keep RecordID
for stat_var in static_variables_we_want:
    if stat_var in df_a.columns:
        agg_funcs[stat_var] = 'first'  # Keep static variables

# Compute mean and std in one go
df_a_agg = df_a.groupby('RecordID').agg(agg_funcs)

df_a_agg.columns = ['_'.join(col).strip() for col in df_a_agg.columns.values]


In [6]:
# same for df_b 
agg_funcs = {col: ['mean','std','max','min','skew'] for col in dyn_variables}
agg_funcs['RecordID'] = 'first'  # Keep RecordID
for stat_var in static_variables_we_want:
    if stat_var in df_b.columns:
        agg_funcs[stat_var] = 'first'  # Keep static variables

# Compute mean and std in one go
df_b_agg = df_b.groupby('RecordID').agg(agg_funcs)

df_b_agg.columns = ['_'.join(col).strip() for col in df_b_agg.columns.values]


In [7]:
# same for df_c
agg_funcs = {col: ['mean','std','max','min','skew'] for col in dyn_variables}
agg_funcs['RecordID'] = 'first'  # Keep RecordID
for stat_var in static_variables_we_want:
    if stat_var in df_c.columns:
        agg_funcs[stat_var] = 'first'  # Keep static variables

# Compute mean and std in one go
df_c_agg = df_c.groupby('RecordID').agg(agg_funcs)
df_c_agg.columns = ['_'.join(col).strip() for col in df_c_agg.columns.values]

In [8]:
# compute median of df_a 

df_a_agg_median = df_a_agg.median()

# fill missing values with median
df_a_agg.fillna(df_a_agg_median, inplace=True)

df_b_agg.fillna(df_a_agg_median, inplace=True)
df_c_agg.fillna(df_a_agg_median, inplace=True)

In [9]:
# (df_a_agg.isnull().sum() != 0 ) print where true
# print columns with missing values
missing_values_a = df_a_agg.isnull().sum() != 0
#  print only where true
missing_values_a = missing_values_a[missing_values_a].index.tolist()
print("Missing values in df_a_agg:", missing_values_a)

Missing values in df_a_agg: ['Cholesterol_skew']


In [10]:
missing_values_b = df_b_agg.isnull().sum() != 0
#  print only where true
missing_values_b = missing_values_b[missing_values_b].index.tolist()
print("Missing values in df_b_agg:", missing_values_b)

Missing values in df_b_agg: ['Cholesterol_skew']


In [11]:
missing_values_c = df_c_agg.isnull().sum() != 0
#  print only where true
missing_values_c = missing_values_c[missing_values_c].index.tolist()
print("Missing values in df_c_agg:", missing_values_c)

Missing values in df_c_agg: ['Cholesterol_skew']


In [12]:
# drop Cholesterol_skew in df_a_agg, df_b_agg, df_c_agg
df_a_agg = df_a_agg.drop(columns=['Cholesterol_skew'])
df_b_agg = df_b_agg.drop(columns=['Cholesterol_skew'])
df_c_agg = df_c_agg.drop(columns=['Cholesterol_skew'])

In [13]:
len(df_a_agg['Weight_VAR_mean'].unique()), len(df_a_agg['Weight_VAR_std'].unique()) 

(2110, 1471)

In [14]:
assert df_a_agg.isnull().sum().sum() == 0
assert df_b_agg.isnull().sum().sum() == 0
assert df_c_agg.isnull().sum().sum() == 0

# Tsfresh

In [15]:
# open set-a_no_nan.parquet
df_a_no_nan = pd.read_parquet('data/set-a_no_nan.parquet', engine='pyarrow')
df_b_no_nan = pd.read_parquet('data/set-b_no_nan.parquet', engine='pyarrow')
df_c_no_nan = pd.read_parquet('data/set-c_no_nan.parquet', engine='pyarrow')


In [16]:
df_a_no_nan.columns

Index(['RecordID', 'Time', 'Age', 'Gender', 'Height', 'Weight', 'Albumin',
       'ALP', 'ALT', 'AST', 'Bilirubin', 'BUN', 'Cholesterol', 'Creatinine',
       'DiasABP', 'FiO2', 'GCS', 'Glucose', 'HCO3', 'HCT', 'HR', 'K',
       'Lactate', 'Mg', 'MAP', 'MechVent', 'Na', 'NIDiasABP', 'NIMAP',
       'NISysABP', 'PaCO2', 'PaO2', 'pH', 'Platelets', 'RespRate', 'SaO2',
       'SysABP', 'Temp', 'TroponinI', 'TroponinT', 'Urine', 'WBC',
       'Weight_VAR'],
      dtype='object', name='Parameter')

In [17]:
from tsfresh import extract_features, select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_selection.relevance import calculate_relevance_table


In [18]:
outcomes = pd.read_csv('data/outcomes.csv')

outcomes_a = outcomes.loc[outcomes['RecordID'].isin(df_a_no_nan['RecordID'])]
# outcomes_a set index to RecordID
outcomes_a = outcomes_a.set_index('RecordID')
outcomes_a 

Unnamed: 0_level_0,In-hospital_death
RecordID,Unnamed: 1_level_1
132539,0
132540,0
132541,0
132543,0
132545,0
...,...
142665,0
142667,0
142670,0
142671,1


## To compute Tsfresh features on training set 

In [None]:
from tsfresh import extract_features
from tsfresh.feature_selection.relevance import calculate_relevance_table
from tsfresh.utilities.dataframe_functions import impute
import pandas as pd
from functools import reduce
from pandas import DataFrame
from tqdm import tqdm

we_compute_training_features = False

if we_compute_training_features:
    # 1. Load data
    df_a_no_nan = pd.read_parquet('data/set-a_no_nan.parquet', engine='pyarrow')
    outcomes = pd.read_csv('data/outcomes.csv')
    outcomes_a = outcomes.loc[outcomes['RecordID'].isin(df_a_no_nan['RecordID'])]
    outcomes_a = outcomes_a.set_index('RecordID')

    # 2. Define your dynamic variables (update this list as needed)
    # cf above

    # 3. Empty list to collect features for all variables
    all_feature_sets = []

    # 4. Loop through each variable
    for i, var in tqdm(enumerate(dyn_variables), desc="Processing variables"):
        print(f"Processing {i}, {var}...")
        if (i <26):
            continue
        # if (i==26):
        #     break
        
        df_long = df_a_no_nan[['RecordID', 'Time', var]].copy()
        df_long = df_long.rename(columns={var: 'value'})  # tsfresh expects 'value'
        
        # extract tsfresh features
        features = extract_features(df_long, column_id='RecordID', column_sort='Time', n_jobs=8)
        
        # drop features with NaNs
        features = features.dropna(axis=1, how='any')
        if features.empty:
            continue  # skip if nothing left
        
        # align with labels
        labels = outcomes_a.loc[features.index]['In-hospital_death']
        
        # calculate relevance
        relevance_table = calculate_relevance_table(features, labels, ml_task='classification')
        top_features = relevance_table[relevance_table.relevant].sort_values("p_value")["feature"][:5]
        
        # reduce to top 5 features and rename
        selected = features[top_features]
        selected.columns = [f"{var}__{col}" for col in selected.columns]
        
        # collect
        all_feature_sets.append(selected)

    # 5. Combine all

    tsfresh_final_features = reduce(lambda left, right: left.join(right, how='outer'), all_feature_sets)

    # 6. Final cleanup
    tsfresh_final_features = tsfresh_final_features.fillna(tsfresh_final_features.median())

    print("Final shape:", tsfresh_final_features.shape)
    tsfresh_final_features.head()

    # save tsfresh_final_features
    tsfresh_final_features.to_parquet('data/tsfresh_final_features_26.parquet', engine='pyarrow', index=True)


## Retrieving the same tsfresh features as in the traning set for the test set 

In [21]:
from collections import defaultdict


In [22]:
# tsfresh_final_features_10 = pd.read_parquet('data/tsfresh_final_features_10.parquet', engine='pyarrow') #CHANGE
# tsfresh_final_features_10.columns
# tsfresh_final_features_11_25 = pd.read_parquet('data/tsfresh_final_features_11_25.parquet', engine='pyarrow')
# tsfresh_final_features_11_25.columns
tsfresh_final_features_26 = pd.read_parquet('data/tsfresh_final_features_26.parquet', engine='pyarrow')
tsfresh_final_features_26.columns


Index(['PaO2__value__change_quantiles__f_agg_"mean"__isabs_True__qh_0.6__ql_0.0',
       'PaO2__value__change_quantiles__f_agg_"var"__isabs_False__qh_0.6__ql_0.0',
       'PaO2__value__change_quantiles__f_agg_"var"__isabs_True__qh_0.6__ql_0.0',
       'PaO2__value__count_above_mean',
       'PaO2__value__approximate_entropy__m_2__r_0.7',
       'Lactate__value__sum_of_reoccurring_values',
       'Lactate__value__approximate_entropy__m_2__r_0.3',
       'Lactate__value__change_quantiles__f_agg_"mean"__isabs_True__qh_0.8__ql_0.2',
       'Lactate__value__approximate_entropy__m_2__r_0.1',
       'Lactate__value__sample_entropy',
       'ALT__value__ratio_value_number_to_time_series_length',
       'ALT__value__approximate_entropy__m_2__r_0.7',
       'ALT__value__spkt_welch_density__coeff_2',
       'ALT__value__lempel_ziv_complexity__bins_100',
       'ALT__value__fft_coefficient__attr_"abs"__coeff_24',
       'NISysABP__value__agg_linear_trend__attr_"rvalue"__chunk_len_5__f_agg_"var"',


In [23]:
def group_by_first_double_underscore(feature_list):
    grouped = defaultdict(list)
    for item in feature_list:
        if '__' in item:
            key, rest = item.split('__', 1)  # split only at the first occurrence
            grouped[key].append(rest)
    return dict(grouped)


In [24]:
dico_selected = group_by_first_double_underscore(tsfresh_final_features_26.columns.tolist()) #CHANGE
dico_selected

{'PaO2': ['value__change_quantiles__f_agg_"mean"__isabs_True__qh_0.6__ql_0.0',
  'value__change_quantiles__f_agg_"var"__isabs_False__qh_0.6__ql_0.0',
  'value__change_quantiles__f_agg_"var"__isabs_True__qh_0.6__ql_0.0',
  'value__count_above_mean',
  'value__approximate_entropy__m_2__r_0.7'],
 'Lactate': ['value__sum_of_reoccurring_values',
  'value__approximate_entropy__m_2__r_0.3',
  'value__change_quantiles__f_agg_"mean"__isabs_True__qh_0.8__ql_0.2',
  'value__approximate_entropy__m_2__r_0.1',
  'value__sample_entropy'],
 'ALT': ['value__ratio_value_number_to_time_series_length',
  'value__approximate_entropy__m_2__r_0.7',
  'value__spkt_welch_density__coeff_2',
  'value__lempel_ziv_complexity__bins_100',
  'value__fft_coefficient__attr_"abs"__coeff_24'],
 'NISysABP': ['value__agg_linear_trend__attr_"rvalue"__chunk_len_5__f_agg_"var"',
  'value__agg_linear_trend__attr_"rvalue"__chunk_len_10__f_agg_"var"',
  'value__agg_linear_trend__attr_"slope"__chunk_len_10__f_agg_"var"',
  'value

In [25]:
from tsfresh import extract_features
from tsfresh.feature_selection.relevance import calculate_relevance_table
from tsfresh.utilities.dataframe_functions import impute
import pandas as pd
from functools import reduce
from pandas import DataFrame
from tqdm import tqdm


# 1. Load data
df_c_no_nan = pd.read_parquet('data/set-c_no_nan.parquet', engine='pyarrow')
outcomes = pd.read_csv('data/outcomes.csv')

In [26]:
# df_c_no_nan = df_c_no_nan.iloc[:490, :]

In [27]:

# 2. Define your dynamic variables (update this list as needed)
# cf above

# 3. Empty list to collect features for all variables
all_feature_sets = []

# 4. Loop through each variable
for i, var in tqdm(enumerate(dyn_variables), desc="Processing variables"):
    print(f"Processing {i}, {var}...")
    if var not in dico_selected.keys():
        print(var)
        print('not there')
        continue

    # if (i <10): 
    #     continue
    if (i <26): 
        continue
    # if (i==26): # change !
    #     break
    
    df_long = df_c_no_nan[['RecordID', 'Time', var]].copy()
    df_long = df_long.rename(columns={var: 'value'})  # tsfresh expects 'value'
    
    # extract tsfresh features
    features = extract_features(df_long, column_id='RecordID', column_sort='Time', n_jobs=8)


    
    # drop features with NaNs
    # features = features.dropna(axis=1, how='any')
    if features.empty:
        continue  # skip if nothing left
    
    top_features = dico_selected[var]
    selected = features[top_features]
    selected.columns = [f"{var}__{col}" for col in selected.columns]


    
    # collect
    all_feature_sets.append(selected)

# 5. Combine all

tsfresh_final_features = reduce(lambda left, right: left.join(right, how='outer'), all_feature_sets)

# tsfresh_final_features = tsfresh_final_features['TroponinI__value__fourier_entropy__bins_2']

print("Final shape:", tsfresh_final_features.shape)
tsfresh_final_features.head()


Processing variables: 0it [00:00, ?it/s]

Processing 0, TroponinI...
TroponinI
not there
Processing 1, DiasABP...
DiasABP
not there
Processing 2, MechVent...
MechVent
not there
Processing 3, HCO3...
HCO3
not there
Processing 4, Cholesterol...
Cholesterol
not there
Processing 5, HCT...
HCT
not there
Processing 6, SaO2...
SaO2
not there
Processing 7, WBC...
WBC
not there
Processing 8, SysABP...
SysABP
not there
Processing 9, Urine...
Urine
not there
Processing 10, ALP...
ALP
not there
Processing 11, Creatinine...
Creatinine
not there
Processing 12, K...
K
not there
Processing 13, AST...
AST
not there
Processing 14, Glucose...
Glucose
not there
Processing 15, RespRate...
RespRate
not there
Processing 16, MAP...
MAP
not there
Processing 17, FiO2...
FiO2
not there
Processing 18, BUN...
BUN
not there
Processing 19, Na...
Na
not there
Processing 20, Bilirubin...
Bilirubin
not there
Processing 21, TroponinT...
TroponinT
not there
Processing 22, PaCO2...
PaCO2
not there
Processing 23, GCS...
GCS
not there
Processing 24, HR...
HR
not th


[Ature Extraction:   0%|          | 0/40 [00:00<?, ?it/s]
[Ature Extraction:   2%|▎         | 1/40 [00:15<10:13, 15.73s/it]
[Ature Extraction:   5%|▌         | 2/40 [00:25<07:42, 12.16s/it]
[Ature Extraction:   8%|▊         | 3/40 [00:36<07:07, 11.55s/it]
[Ature Extraction:  10%|█         | 4/40 [00:41<05:24,  9.02s/it]
[Ature Extraction:  12%|█▎        | 5/40 [00:47<04:33,  7.82s/it]
[Ature Extraction:  15%|█▌        | 6/40 [00:59<05:21,  9.45s/it]
[Ature Extraction:  18%|█▊        | 7/40 [00:59<03:33,  6.46s/it]
[Ature Extraction:  20%|██        | 8/40 [01:01<02:32,  4.75s/it]
[Ature Extraction:  22%|██▎       | 9/40 [01:01<01:44,  3.36s/it]
[Ature Extraction:  25%|██▌       | 10/40 [01:01<01:11,  2.38s/it]
[Ature Extraction:  28%|██▊       | 11/40 [01:01<00:51,  1.77s/it]
[Ature Extraction:  30%|███       | 12/40 [01:04<00:57,  2.04s/it]
[Ature Extraction:  32%|███▎      | 13/40 [01:21<02:56,  6.53s/it]
[Ature Extraction:  35%|███▌      | 14/40 [01:25<02:30,  5.78s/i

Processing 27, Lactate...



[Ature Extraction:   0%|          | 0/40 [00:00<?, ?it/s]
[Ature Extraction:   2%|▎         | 1/40 [00:18<11:48, 18.17s/it]
[Ature Extraction:   5%|▌         | 2/40 [00:29<08:53, 14.04s/it]
[Ature Extraction:   8%|▊         | 3/40 [00:32<05:29,  8.89s/it]
[Ature Extraction:  10%|█         | 4/40 [00:36<04:11,  7.00s/it]
[Ature Extraction:  12%|█▎        | 5/40 [00:44<04:17,  7.35s/it]
[Ature Extraction:  15%|█▌        | 6/40 [00:46<03:15,  5.76s/it]
[Ature Extraction:  18%|█▊        | 7/40 [00:51<02:58,  5.40s/it]
[Ature Extraction:  20%|██        | 8/40 [00:53<02:18,  4.34s/it]
[Ature Extraction:  22%|██▎       | 9/40 [00:54<01:44,  3.39s/it]
[Ature Extraction:  25%|██▌       | 10/40 [00:58<01:39,  3.32s/it]
[Ature Extraction:  28%|██▊       | 11/40 [01:00<01:31,  3.16s/it]
[Ature Extraction:  30%|███       | 12/40 [01:01<01:05,  2.36s/it]
[Ature Extraction:  32%|███▎      | 13/40 [01:09<01:49,  4.04s/it]
[Ature Extraction:  35%|███▌      | 14/40 [01:13<01:47,  4.13s/i

Processing 28, ALT...



[Ature Extraction:   0%|          | 0/40 [00:00<?, ?it/s]
[Ature Extraction:   2%|▎         | 1/40 [00:11<07:39, 11.78s/it]
[Ature Extraction:   5%|▌         | 2/40 [00:22<06:56, 10.97s/it]
[Ature Extraction:   8%|▊         | 3/40 [00:31<06:18, 10.23s/it]
[Ature Extraction:  10%|█         | 4/40 [00:40<05:51,  9.76s/it]
[Ature Extraction:  12%|█▎        | 5/40 [00:50<05:39,  9.71s/it]
[Ature Extraction:  15%|█▌        | 6/40 [00:50<03:46,  6.66s/it]
[Ature Extraction:  18%|█▊        | 7/40 [00:53<02:55,  5.30s/it]
[Ature Extraction:  20%|██        | 8/40 [00:57<02:32,  4.75s/it]
[Ature Extraction:  22%|██▎       | 9/40 [01:00<02:17,  4.44s/it]
[Ature Extraction:  25%|██▌       | 10/40 [01:01<01:38,  3.27s/it]
[Ature Extraction:  28%|██▊       | 11/40 [01:01<01:08,  2.38s/it]
[Ature Extraction:  30%|███       | 12/40 [01:02<00:49,  1.76s/it]
[Ature Extraction:  32%|███▎      | 13/40 [01:03<00:43,  1.62s/it]
[Ature Extraction:  35%|███▌      | 14/40 [01:12<01:42,  3.93s/i

Processing 29, NISysABP...



[Ature Extraction:   0%|          | 0/40 [00:00<?, ?it/s]
[Ature Extraction:   2%|▎         | 1/40 [00:25<16:35, 25.52s/it]
[Ature Extraction:   5%|▌         | 2/40 [00:27<07:20, 11.59s/it]
[Ature Extraction:   8%|▊         | 3/40 [00:31<05:11,  8.41s/it]
[Ature Extraction:  10%|█         | 4/40 [00:33<03:17,  5.49s/it]
[Ature Extraction:  12%|█▎        | 5/40 [00:51<06:02, 10.36s/it]
[Ature Extraction:  15%|█▌        | 6/40 [00:53<04:07,  7.28s/it]
[Ature Extraction:  18%|█▊        | 7/40 [00:54<02:59,  5.45s/it]
[Ature Extraction:  20%|██        | 8/40 [01:06<03:53,  7.29s/it]
[Ature Extraction:  22%|██▎       | 9/40 [01:07<02:51,  5.55s/it]
[Ature Extraction:  25%|██▌       | 10/40 [01:08<01:59,  3.97s/it]
[Ature Extraction:  28%|██▊       | 11/40 [01:08<01:21,  2.82s/it]
[Ature Extraction:  30%|███       | 12/40 [01:10<01:10,  2.51s/it]
[Ature Extraction:  32%|███▎      | 13/40 [01:12<01:04,  2.38s/it]
[Ature Extraction:  35%|███▌      | 14/40 [01:18<01:32,  3.55s/i

Processing 30, Platelets...



[Ature Extraction:   0%|          | 0/40 [00:00<?, ?it/s]
[Ature Extraction:   2%|▎         | 1/40 [00:18<12:19, 18.97s/it]
[Ature Extraction:   5%|▌         | 2/40 [00:30<09:24, 14.84s/it]
[Ature Extraction:   8%|▊         | 3/40 [00:33<05:42,  9.25s/it]
[Ature Extraction:  10%|█         | 4/40 [00:37<04:22,  7.30s/it]
[Ature Extraction:  12%|█▎        | 5/40 [00:47<04:41,  8.06s/it]
[Ature Extraction:  15%|█▌        | 6/40 [00:51<03:52,  6.84s/it]
[Ature Extraction:  18%|█▊        | 7/40 [01:01<04:16,  7.77s/it]
[Ature Extraction:  20%|██        | 8/40 [01:02<02:58,  5.58s/it]
[Ature Extraction:  22%|██▎       | 9/40 [01:05<02:23,  4.64s/it]
[Ature Extraction:  25%|██▌       | 10/40 [01:06<01:48,  3.62s/it]
[Ature Extraction:  28%|██▊       | 11/40 [01:06<01:14,  2.56s/it]
[Ature Extraction:  30%|███       | 12/40 [01:07<00:57,  2.04s/it]
[Ature Extraction:  32%|███▎      | 13/40 [01:11<01:13,  2.73s/it]
[Ature Extraction:  35%|███▌      | 14/40 [01:17<01:35,  3.68s/i

Processing 31, Temp...



[Ature Extraction:   0%|          | 0/40 [00:00<?, ?it/s]
[Ature Extraction:   2%|▎         | 1/40 [00:26<17:11, 26.44s/it]
[Ature Extraction:   5%|▌         | 2/40 [00:27<07:22, 11.66s/it]
[Ature Extraction:   8%|▊         | 3/40 [00:28<04:00,  6.51s/it]
[Ature Extraction:  10%|█         | 4/40 [00:49<07:20, 12.23s/it]
[Ature Extraction:  12%|█▎        | 5/40 [00:50<04:52,  8.36s/it]
[Ature Extraction:  15%|█▌        | 6/40 [00:51<03:20,  5.90s/it]
[Ature Extraction:  18%|█▊        | 7/40 [01:03<04:11,  7.63s/it]
[Ature Extraction:  20%|██        | 8/40 [01:03<02:49,  5.31s/it]
[Ature Extraction:  22%|██▎       | 9/40 [01:03<01:57,  3.78s/it]
[Ature Extraction:  25%|██▌       | 10/40 [01:04<01:26,  2.89s/it]
[Ature Extraction:  28%|██▊       | 11/40 [01:05<01:06,  2.31s/it]
[Ature Extraction:  30%|███       | 12/40 [01:13<01:53,  4.04s/it]
[Ature Extraction:  32%|███▎      | 13/40 [01:14<01:27,  3.23s/it]
[Ature Extraction:  35%|███▌      | 14/40 [01:15<01:02,  2.42s/i

Processing 32, Mg...



[Ature Extraction:   0%|          | 0/40 [00:00<?, ?it/s]
[Ature Extraction:   2%|▎         | 1/40 [00:12<08:08, 12.52s/it]
[Ature Extraction:   5%|▌         | 2/40 [00:22<06:47, 10.74s/it]
[Ature Extraction:   8%|▊         | 3/40 [00:31<06:12, 10.07s/it]
[Ature Extraction:  10%|█         | 4/40 [00:40<05:51,  9.76s/it]
[Ature Extraction:  12%|█▎        | 5/40 [00:50<05:44,  9.83s/it]
[Ature Extraction:  15%|█▌        | 6/40 [00:57<05:03,  8.91s/it]
[Ature Extraction:  18%|█▊        | 7/40 [00:58<03:24,  6.19s/it]
[Ature Extraction:  20%|██        | 8/40 [01:00<02:30,  4.70s/it]
[Ature Extraction:  22%|██▎       | 9/40 [01:00<01:43,  3.34s/it]
[Ature Extraction:  25%|██▌       | 10/40 [01:02<01:29,  2.99s/it]
[Ature Extraction:  28%|██▊       | 11/40 [01:02<01:02,  2.16s/it]
[Ature Extraction:  30%|███       | 12/40 [01:02<00:44,  1.60s/it]
[Ature Extraction:  32%|███▎      | 13/40 [01:03<00:32,  1.20s/it]
[Ature Extraction:  35%|███▌      | 14/40 [01:11<01:25,  3.30s/i

Processing 33, NIDiasABP...



[Ature Extraction:   0%|          | 0/40 [00:00<?, ?it/s]
[Ature Extraction:   2%|▎         | 1/40 [00:18<12:13, 18.80s/it]
[Ature Extraction:   5%|▌         | 2/40 [00:29<08:47, 13.87s/it]
[Ature Extraction:   8%|▊         | 3/40 [00:31<05:13,  8.46s/it]
[Ature Extraction:  10%|█         | 4/40 [00:37<04:37,  7.71s/it]
[Ature Extraction:  12%|█▎        | 5/40 [00:45<04:30,  7.72s/it]
[Ature Extraction:  15%|█▌        | 6/40 [00:48<03:26,  6.07s/it]
[Ature Extraction:  18%|█▊        | 7/40 [00:49<02:31,  4.59s/it]
[Ature Extraction:  20%|██        | 8/40 [00:51<01:55,  3.60s/it]
[Ature Extraction:  22%|██▎       | 9/40 [00:57<02:15,  4.38s/it]
[Ature Extraction:  25%|██▌       | 10/40 [00:58<01:37,  3.25s/it]
[Ature Extraction:  28%|██▊       | 11/40 [01:02<01:43,  3.58s/it]
[Ature Extraction:  30%|███       | 12/40 [01:03<01:18,  2.81s/it]
[Ature Extraction:  32%|███▎      | 13/40 [01:10<01:49,  4.05s/it]
[Ature Extraction:  35%|███▌      | 14/40 [01:19<02:21,  5.45s/i

Processing 34, Albumin...



[Ature Extraction:   0%|          | 0/40 [00:00<?, ?it/s]
[Ature Extraction:   2%|▎         | 1/40 [00:13<08:43, 13.43s/it]
[Ature Extraction:   5%|▌         | 2/40 [00:23<07:20, 11.59s/it]
[Ature Extraction:   8%|▊         | 3/40 [00:25<04:25,  7.17s/it]
[Ature Extraction:  10%|█         | 4/40 [00:34<04:44,  7.91s/it]
[Ature Extraction:  12%|█▎        | 5/40 [00:43<04:49,  8.27s/it]
[Ature Extraction:  15%|█▌        | 6/40 [00:54<04:41,  8.29s/it]
[Ature Extraction:  18%|█▊        | 7/40 [00:58<04:17,  7.82s/it]
[Ature Extraction:  20%|██        | 8/40 [00:59<03:01,  5.66s/it]
[Ature Extraction:  22%|██▎       | 9/40 [01:00<02:04,  4.02s/it]
[Ature Extraction:  25%|██▌       | 10/40 [01:00<01:26,  2.88s/it]
[Ature Extraction:  28%|██▊       | 11/40 [01:00<01:01,  2.11s/it]
[Ature Extraction:  30%|███       | 12/40 [01:01<00:48,  1.72s/it]
[Ature Extraction:  32%|███▎      | 13/40 [01:04<00:51,  1.91s/it]
[Ature Extraction:  35%|███▌      | 14/40 [01:04<00:37,  1.43s/i

Processing 35, NIMAP...



[Ature Extraction:   0%|          | 0/40 [00:00<?, ?it/s]
[Ature Extraction:   2%|▎         | 1/40 [00:13<08:28, 13.05s/it]
[Ature Extraction:   5%|▌         | 2/40 [00:22<06:55, 10.93s/it]
[Ature Extraction:   8%|▊         | 3/40 [00:30<05:55,  9.61s/it]
[Ature Extraction:  10%|█         | 4/40 [00:41<06:06, 10.18s/it]
[Ature Extraction:  12%|█▎        | 5/40 [00:48<05:09,  8.84s/it]
[Ature Extraction:  15%|█▌        | 6/40 [00:50<03:42,  6.55s/it]
[Ature Extraction:  18%|█▊        | 7/40 [00:51<02:42,  4.93s/it]
[Ature Extraction:  20%|██        | 8/40 [00:53<02:00,  3.76s/it]
[Ature Extraction:  22%|██▎       | 9/40 [00:58<02:11,  4.23s/it]
[Ature Extraction:  25%|██▌       | 10/40 [00:59<01:34,  3.16s/it]
[Ature Extraction:  28%|██▊       | 11/40 [01:01<01:28,  3.06s/it]
[Ature Extraction:  30%|███       | 12/40 [01:02<01:02,  2.24s/it]
[Ature Extraction:  32%|███▎      | 13/40 [01:09<01:40,  3.71s/it]
[Ature Extraction:  35%|███▌      | 14/40 [01:20<02:34,  5.93s/i

Processing 36, Weight_VAR...



[Ature Extraction:   0%|          | 0/40 [00:00<?, ?it/s]
[Ature Extraction:   2%|▎         | 1/40 [00:23<15:11, 23.37s/it]
[Ature Extraction:   5%|▌         | 2/40 [00:23<06:13,  9.82s/it]
[Ature Extraction:   8%|▊         | 3/40 [00:27<04:19,  7.02s/it]
[Ature Extraction:  10%|█         | 4/40 [00:29<03:05,  5.16s/it]
[Ature Extraction:  12%|█▎        | 5/40 [00:46<05:22,  9.22s/it]
[Ature Extraction:  15%|█▌        | 6/40 [00:47<03:44,  6.61s/it]
[Ature Extraction:  18%|█▊        | 7/40 [00:51<03:12,  5.83s/it]
[Ature Extraction:  20%|██        | 8/40 [00:57<03:03,  5.72s/it]
[Ature Extraction:  22%|██▎       | 9/40 [00:59<02:17,  4.44s/it]
[Ature Extraction:  25%|██▌       | 10/40 [00:59<01:33,  3.13s/it]
[Ature Extraction:  28%|██▊       | 11/40 [01:01<01:21,  2.80s/it]
[Ature Extraction:  30%|███       | 12/40 [01:02<01:04,  2.32s/it]
[Ature Extraction:  32%|███▎      | 13/40 [01:09<01:40,  3.72s/it]
[Ature Extraction:  35%|███▌      | 14/40 [01:13<01:37,  3.75s/i

Final shape: (4000, 55)





Unnamed: 0,"PaO2__value__change_quantiles__f_agg_""mean""__isabs_True__qh_0.6__ql_0.0","PaO2__value__change_quantiles__f_agg_""var""__isabs_False__qh_0.6__ql_0.0","PaO2__value__change_quantiles__f_agg_""var""__isabs_True__qh_0.6__ql_0.0",PaO2__value__count_above_mean,PaO2__value__approximate_entropy__m_2__r_0.7,Lactate__value__sum_of_reoccurring_values,Lactate__value__approximate_entropy__m_2__r_0.3,"Lactate__value__change_quantiles__f_agg_""mean""__isabs_True__qh_0.8__ql_0.2",Lactate__value__approximate_entropy__m_2__r_0.1,Lactate__value__sample_entropy,...,"NIMAP__value__agg_linear_trend__attr_""rvalue""__chunk_len_5__f_agg_""var""","NIMAP__value__agg_linear_trend__attr_""rvalue""__chunk_len_10__f_agg_""var""","NIMAP__value__agg_linear_trend__attr_""intercept""__chunk_len_10__f_agg_""min""","NIMAP__value__agg_linear_trend__attr_""intercept""__chunk_len_5__f_agg_""min""","NIMAP__value__agg_linear_trend__attr_""rvalue""__chunk_len_10__f_agg_""max""",Weight_VAR__value__energy_ratio_by_chunks__num_segments_10__segment_focus_2,"Weight_VAR__value__change_quantiles__f_agg_""mean""__isabs_False__qh_0.6__ql_0.2","Weight_VAR__value__change_quantiles__f_agg_""var""__isabs_False__qh_0.6__ql_0.2","Weight_VAR__value__change_quantiles__f_agg_""var""__isabs_True__qh_0.6__ql_0.2","Weight_VAR__value__change_quantiles__f_agg_""mean""__isabs_True__qh_0.6__ql_0.2"
152871.0,2.0,33.683594,29.6875,23.0,0.474895,4.2,0.140442,0.019048,0.140442,0.099967,...,0.0,-0.707107,75.33,75.33,0.0,0.086122,0.262162,2.474244,2.474244,0.262162
152873.0,1.228571,12.404898,11.033469,27.0,0.30459,10.8,0.421972,0.016667,0.421972,0.351761,...,0.359495,0.511459,70.266,70.761636,0.047594,0.086419,0.220455,2.089809,2.089809,0.220455
152875.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.04256,...,-0.556346,-0.624538,70.93,74.005091,-0.838189,0.107422,-0.095238,0.371882,0.371882,0.095238
152878.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.04256,...,0.54219,0.6297,74.398,77.465182,0.428084,0.102041,0.0,0.0,0.0,0.0
152882.0,1.393939,21.702479,20.056933,25.0,0.262517,4.3,0.083417,0.00625,0.083417,0.075476,...,-0.522233,-0.707107,67.53,70.839091,0.0,0.102041,0.0,0.0,0.0,0.0


In [28]:
# save tsfresh_final_features CHANGE
tsfresh_final_features.to_parquet('data/tsfresh_final_features_C_10.parquet', engine='pyarrow', index=True)
# tsfresh_final_features.to_parquet('data/tsfresh_final_features_C_11_25.parquet', engine='pyarrow', index=True)
# tsfresh_final_features.to_parquet('data/tsfresh_final_features_C_26.parquet', engine='pyarrow', index=True)