## Aggregate, filter, transform, apply

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame({'gruplar': ['A', 'B', 'C','A', 'B', 'C'],
                  'degisken1' : [10,23,33,22,11,99],
                  'degisken2' : [100,253,333,262,111,969]}, 
                  columns= ['gruplar', 'degisken1', 'degisken2'])
df

Unnamed: 0,gruplar,degisken1,degisken2
0,A,10,100
1,B,23,253
2,C,33,333
3,A,22,262
4,B,11,111
5,C,99,969


In [3]:
df.groupby('gruplar').aggregate(['min', np.median, max])

Unnamed: 0_level_0,degisken1,degisken1,degisken1,degisken2,degisken2,degisken2
Unnamed: 0_level_1,min,median,max,min,median,max
gruplar,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
A,10,16,22,100,181,262
B,11,17,23,111,182,253
C,33,66,99,333,651,969


In [4]:
df.groupby('gruplar').aggregate({
    'degisken1' : 'min',
    'degisken2' : 'max'
})

Unnamed: 0_level_0,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1
A,10,262
B,11,253
C,33,969


In [5]:
def filter_func(x):
    return x['degisken1'].std() > 9

In [6]:
df.groupby('gruplar').filter(filter_func)

Unnamed: 0,gruplar,degisken1,degisken2
2,C,33,333
5,C,99,969


In [7]:
df.groupby('gruplar').transform(lambda x : (x - x.mean()) / x.std())

Unnamed: 0,degisken1,degisken2
0,-0.707107,-0.707107
1,0.707107,0.707107
2,-0.707107,-0.707107
3,0.707107,0.707107
4,-0.707107,-0.707107
5,0.707107,0.707107


In [8]:
# Apply diğer işlemlerden daha hızlı yapılır.
df.groupby('gruplar').apply(np.sum)

Unnamed: 0_level_0,gruplar,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A,AA,32,362
B,BB,34,364
C,CC,132,1302


In [9]:
df.groupby(df['gruplar']).sum()

Unnamed: 0_level_0,degisken1,degisken2
gruplar,Unnamed: 1_level_1,Unnamed: 2_level_1
A,32,362
B,34,364
C,132,1302


In [10]:
L = [0,1,0,1,2,0] #split key - Dışsal indeksleme

In [11]:
df.groupby(L).sum()

Unnamed: 0,degisken1,degisken2
0,142,1402
1,45,515
2,11,111
