In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
def tabular_features(dataframe):
    timeseries = ['xx1', 'xx2', 'xx3', 'xx4', 'xx5']
    
    reduced = dataframe[['patient_id','key' ,'gender', 'age','x1', 'x2', 'x3', 'x4', 'x5', 'x6']].drop_duplicates()
    
    for col_name in timeseries:
        
        features = dataframe.groupby(['patient_id', 'key'], as_index=False).aggregate({col_name:['mean','std','max','min']})
        
        #fix column names
        col_names = ['patient_id', 'key'] + [f"{col_name}_{i}" for i in ['mean','std','max','min']]
        features.columns = col_names
        
        row1 = reduced.shape[0]
        reduced = pd.merge(reduced, features, on=['patient_id', 'key'])
        row2 = reduced.shape[0]
        
        if row1!=row2:
            print(f"features shape {features.shape}")
            print(f"reduced shape {reduced.shape}")
            print(f'rows are different for {col_name}')
            print(f'row1-row2 = {row1}-{row2}')
    
    return reduced

In [5]:
def tabular_features_with_key(dataframe):
    timeseries = ['xx1', 'xx2', 'xx3', 'xx4', 'xx5']
    
    reduced = dataframe[['patient_id','key' ,'gender', 'age','x1', 'x2', 'x3', 'x4', 'x5', 'x6']].drop_duplicates()
    reduced['key_feature'] = reduced['key'].apply(lambda x: int(x.split('-')[-1]))
    reduced['key_feature'] = reduced[['key_feature', 'patient_id']].groupby('patient_id').transform('max')['key_feature']
    
    for col_name in timeseries:
        
        features = dataframe.groupby(['patient_id', 'key'], as_index=False).aggregate({col_name:['mean','std','max','min']})
        
        #fix column names
        col_names = ['patient_id', 'key'] + [f"{col_name}_{i}" for i in ['mean','std','max','min']]
        features.columns = col_names
        
        row1 = reduced.shape[0]
        reduced = pd.merge(reduced, features, on=['patient_id', 'key'])
        row2 = reduced.shape[0]
        
        if row1!=row2:
            print(f"features shape {features.shape}")
            print(f"reduced shape {reduced.shape}")
            print(f'rows are different for {col_name}')
            print(f'row1-row2 = {row1}-{row2}')
    
    return reduced