# Imports

In [2]:
import numpy  as np
import pandas as pd

from scipy.stats      import kurtosis
from sklearn.datasets import load_diabetes

## Loading Datasets

In [3]:
X, y = load_diabetes(return_X_y=True, as_frame=True, scaled=False)

In [4]:
X.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,59.0,2.0,32.1,101.0,157.0,93.2,38.0,4.0,4.8598,87.0
1,48.0,1.0,21.6,87.0,183.0,103.2,70.0,3.0,3.8918,69.0
2,72.0,2.0,30.5,93.0,156.0,93.6,41.0,4.0,4.6728,85.0
3,24.0,1.0,25.3,84.0,198.0,131.4,40.0,5.0,4.8903,89.0
4,50.0,1.0,23.0,101.0,192.0,125.4,52.0,4.0,4.2905,80.0


In [5]:
X.groupby(['sex'])['age'].agg(
    age_max=('max'), 
    age_min=('min'), 
    age_mean=('mean')
)

Unnamed: 0_level_0,age_max,age_min,age_mean
sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1.0,75.0,19.0,46.382979
2.0,79.0,20.0,50.942029


In [6]:
df_aux = X.groupby(['sex']).agg(
    {
        'age': [
            'sum', 
            'mean', 
            'median', 
            'min', 
            'max', 
            'std', 
            'var',
            'skew',
            ('kurtosis', kurtosis),
            ('range', lambda i: np.max(i) - np.min(i)),
            (f'quantile_0.25', lambda i: np.quantile(i, q=0.25)),
            (f'quantile_0.5', lambda i: np.quantile(i, q=0.5)),
            (f'quantile_0.75', lambda i: np.quantile(i, q=0.75)),
        ]
    }
)

In [7]:
df_aux

Unnamed: 0_level_0,age,age,age,age,age,age,age,age,age,age,age,age,age
Unnamed: 0_level_1,sum,mean,median,min,max,std,var,skew,kurtosis,range,quantile_0.25,quantile_0.5,quantile_0.75
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
1.0,10900.0,46.382979,48.0,19.0,75.0,13.120665,172.151846,-0.092271,-0.632192,56.0,37.0,48.0,55.0
2.0,10545.0,50.942029,53.0,20.0,79.0,12.697623,161.229633,-0.394162,-0.611248,59.0,41.0,53.0,60.0


In [8]:
df_aux.columns = ['age__' + name for name in df_aux.columns.droplevel()]

In [9]:
df_aux.reset_index()

Unnamed: 0,sex,age__sum,age__mean,age__median,age__min,age__max,age__std,age__var,age__skew,age__kurtosis,age__range,age__quantile_0.25,age__quantile_0.5,age__quantile_0.75
0,1.0,10900.0,46.382979,48.0,19.0,75.0,13.120665,172.151846,-0.092271,-0.632192,56.0,37.0,48.0,55.0
1,2.0,10545.0,50.942029,53.0,20.0,79.0,12.697623,161.229633,-0.394162,-0.611248,59.0,41.0,53.0,60.0


In [10]:
list_primitives_func = [
    'sum', 'mean', 'median', 'min', 'max', 'std', 'var', 'skew', ('kurtosis', kurtosis), ('range', lambda i: np.max(i) - np.min(i))]

In [11]:
list_quantiles = [.05, 0.25, 0.50, 0.75, 0.95]
list_quantiles_func = [(f'quantile_{q}', lambda i, q=q: np.quantile(i, q=q)) for q in list_quantiles]

list_quantiles_func

[('quantile_0.05', <function __main__.<listcomp>.<lambda>(i, q=0.05)>),
 ('quantile_0.25', <function __main__.<listcomp>.<lambda>(i, q=0.25)>),
 ('quantile_0.5', <function __main__.<listcomp>.<lambda>(i, q=0.5)>),
 ('quantile_0.75', <function __main__.<listcomp>.<lambda>(i, q=0.75)>),
 ('quantile_0.95', <function __main__.<listcomp>.<lambda>(i, q=0.95)>)]

In [12]:
list_primitives_func.extend(list_quantiles_func)

In [13]:
list_primitives_func

['sum',
 'mean',
 'median',
 'min',
 'max',
 'std',
 'var',
 'skew',
 ('kurtosis',
  <function scipy.stats._stats_py.kurtosis(a, axis=0, fisher=True, bias=True, nan_policy='propagate', *, keepdims=False)>),
 ('range', <function __main__.<lambda>(i)>),
 ('quantile_0.05', <function __main__.<listcomp>.<lambda>(i, q=0.05)>),
 ('quantile_0.25', <function __main__.<listcomp>.<lambda>(i, q=0.25)>),
 ('quantile_0.5', <function __main__.<listcomp>.<lambda>(i, q=0.5)>),
 ('quantile_0.75', <function __main__.<listcomp>.<lambda>(i, q=0.75)>),
 ('quantile_0.95', <function __main__.<listcomp>.<lambda>(i, q=0.95)>)]

In [14]:
df_aux = X.groupby(['sex']).agg(
    {
        'age': list_primitives_func
    }
)

In [15]:
df_aux

Unnamed: 0_level_0,age,age,age,age,age,age,age,age,age,age,age,age,age,age,age
Unnamed: 0_level_1,sum,mean,median,min,max,std,var,skew,kurtosis,range,quantile_0.05,quantile_0.25,quantile_0.5,quantile_0.75,quantile_0.95
sex,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2
1.0,10900.0,46.382979,48.0,19.0,75.0,13.120665,172.151846,-0.092271,-0.632192,56.0,23.0,37.0,48.0,55.0,68.0
2.0,10545.0,50.942029,53.0,20.0,79.0,12.697623,161.229633,-0.394162,-0.611248,59.0,28.0,41.0,53.0,60.0,68.0


In [19]:
def agg_num(df, groupby, variables, quantiles=[0.25, 0.50, 0.75]):

    list_funcs = [
        'sum', 'mean', 'median', 'min', 'max', 'std', 'var', 'skew',
        ('kurtosis', kurtosis), ('range', lambda i: np.max(i) - np.min(i))]
    
    list_quantiles = [
        (f'quantile_{q}', lambda i, q=q: np.quantile(i, q=q)) for q in quantiles]
    
    list_funcs.extend(list_quantiles)

    dict_funcs = {var: list_funcs for var in variables}

    df_agg = df.groupby(groupby).agg(dict_funcs)

    df_agg.columns = [f'{col[0]}_{col[1]}' for col in df_agg.columns]

    return df_agg.reset_index()

In [20]:
X.head()

Unnamed: 0,age,sex,bmi,bp,s1,s2,s3,s4,s5,s6
0,59.0,2.0,32.1,101.0,157.0,93.2,38.0,4.0,4.8598,87.0
1,48.0,1.0,21.6,87.0,183.0,103.2,70.0,3.0,3.8918,69.0
2,72.0,2.0,30.5,93.0,156.0,93.6,41.0,4.0,4.6728,85.0
3,24.0,1.0,25.3,84.0,198.0,131.4,40.0,5.0,4.8903,89.0
4,50.0,1.0,23.0,101.0,192.0,125.4,52.0,4.0,4.2905,80.0


In [21]:
df_aux = agg_num(X, groupby=['sex'], variables=['age', 'bmi'])

In [22]:
df_aux.head()

Unnamed: 0,sex,age_sum,age_mean,age_median,age_min,age_max,age_std,age_var,age_skew,age_kurtosis,...,bmi_min,bmi_max,bmi_std,bmi_var,bmi_skew,bmi_kurtosis,bmi_range,bmi_quantile_0.25,bmi_quantile_0.5,bmi_quantile_0.75
0,1.0,10900.0,46.382979,48.0,19.0,75.0,13.120665,172.151846,-0.092271,-0.632192,...,18.5,41.3,4.571242,20.896254,0.490149,-0.38559,22.8,22.2,25.5,29.55
1,2.0,10545.0,50.942029,53.0,20.0,79.0,12.697623,161.229633,-0.394162,-0.611248,...,18.0,42.2,4.210251,17.726217,0.827243,0.690714,24.2,24.0,25.9,29.0
