In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_openml

X,y = fetch_openml(name='credit-g', as_frame=True, return_X_y=True)
df = X
df['target'] = y
df.head()

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,...,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker,target
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,...,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes,good
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,...,real estate,22.0,none,own,1.0,skilled,1.0,none,yes,bad
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,...,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes,good
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,...,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes,good
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,...,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes,bad


#### Use different aggregations for different columns

In [5]:
df[['job', 'credit_amount', 'age']]\
    .groupby(['job'])\
    .agg({'credit_amount': ['min', 'max'], 'age': 'mean'})

Unnamed: 0_level_0,credit_amount,credit_amount,age
Unnamed: 0_level_1,min,max,mean
job,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
unemp/unskilled non res,609.0,14555.0,40.090909
unskilled resident,250.0,11998.0,36.54
skilled,338.0,15945.0,34.253968
high qualif/self emp/mgmt,629.0,18424.0,39.027027


#### Named Aggregations

In [7]:
df[['target', 'credit_amount', 'age']]\
    .groupby('target')\
    .agg(min_credit_amount=pd.NamedAgg('credit_amount', 'min'), 
         max_credit_amount=pd.NamedAgg('credit_amount', 'max'),
        average_age=pd.NamedAgg('age', 'mean'))

Unnamed: 0_level_0,min_credit_amount,max_credit_amount,average_age
target,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
good,250.0,15857.0,36.224286
bad,433.0,18424.0,33.963333


#### Custom Aggregations

In [8]:
job_count = df[['job', 'target']]\
            .groupby(['job', 'target'])\
            .agg({'target': 'count'})
job_percent = job_count.groupby(level=0).apply(lambda x:
                                                 100 * x / float(x.sum()))
job_percent

Unnamed: 0_level_0,Unnamed: 1_level_0,target
job,target,Unnamed: 2_level_1
unemp/unskilled non res,good,68.181818
unemp/unskilled non res,bad,31.818182
unskilled resident,good,72.0
unskilled resident,bad,28.0
skilled,good,70.47619
skilled,bad,29.52381
high qualif/self emp/mgmt,good,65.540541
high qualif/self emp/mgmt,bad,34.459459
