# 계산된 데이터프레임 반환하기

## agg() 함수

In [2]:
import pandas as pd
import seaborn as sns

titanic = sns.load_dataset('titanic')
df = titanic.loc[:, ['age', 'sex', 'class', 'fare', 'survived']]

grouped = df.groupby(['class'])

std_all = grouped.std()
std_all

Unnamed: 0_level_0,age,fare,survived
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
First,14.802856,78.380373,0.484026
Second,14.001077,13.417399,0.500623
Third,12.495398,11.778142,0.428949


In [4]:
type(std_all)

pandas.core.frame.DataFrame

In [5]:
std_fare=grouped.fare.std()

In [8]:
print(type(std_fare))
std_fare

<class 'pandas.core.series.Series'>


class
First     78.380373
Second    13.417399
Third     11.778142
Name: fare, dtype: float64

### agg를 통해 사용자 정의의 함수를 그룹 전체에 적용시키기

In [9]:
def min_max(x):
    return x.max()-x.min()

In [10]:
agg_minmax=grouped.agg(min_max)

In [11]:
agg_minmax

Unnamed: 0_level_0,age,fare,survived
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
First,79.08,512.3292,1
Second,69.33,73.5,1
Third,73.58,69.55,1


In [13]:
agg_all=grouped.agg(['min','max'])
agg_all.head()

Unnamed: 0_level_0,age,age,sex,sex,fare,fare,survived,survived
Unnamed: 0_level_1,min,max,min,max,min,max,min,max
class,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
First,0.92,80.0,female,male,0.0,512.3292,0,1
Second,0.67,70.0,female,male,0.0,73.5,0,1
Third,0.42,74.0,female,male,0.0,69.55,0,1


- 각 그룹에 여러함수를 한번에 적용할 때도 agg를 사용한다

### 한번에 각 그룹에 다른함수 적용하기

In [14]:
agg_sep=grouped.agg({'fare':['min','max'],'age':'mean'})

In [15]:
agg_sep

Unnamed: 0_level_0,fare,fare,age
Unnamed: 0_level_1,min,max,mean
class,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
First,0.0,512.3292,38.233441
Second,0.0,73.5,29.87763
Third,0.0,69.55,25.14062


In [18]:
for key,group in grouped.age:
    print(key)
    print(group.head())

First
1     38.0
3     35.0
6     54.0
11    58.0
23    28.0
Name: age, dtype: float64
Second
9     14.0
15    55.0
17     NaN
20    35.0
21    34.0
Name: age, dtype: float64
Third
0    22.0
2    26.0
4    35.0
5     NaN
7     2.0
Name: age, dtype: float64


In [20]:
age_mean=grouped.age.mean()
age_std=grouped.age.std()

In [21]:
for key,group in grouped.age:
    group_zscore = (group-age_mean.loc[key])/age_std.loc[key]
    print('* key',key)
    display(group_zscore.head(3))
    print()

* key First


1   -0.015770
3   -0.218434
6    1.065103
Name: age, dtype: float64


* key Second


9    -1.134029
15    1.794317
17         NaN
Name: age, dtype: float64


* key Third


0   -0.251342
2    0.068776
4    0.789041
Name: age, dtype: float64




## transform 함수

In [22]:
def z_score(x):
    return (x-x.mean())/x.std()

In [25]:
age_zscore=grouped.transform(z_score)
age_zscore.head()

Unnamed: 0,age,fare,survived
0,-0.251342,-0.545549,-0.565014
1,-0.01577,-0.164217,0.765188
2,0.068776,-0.488239,1.766263
3,-0.218434,-0.396205,0.765188
4,0.789041,-0.477626,-0.565014


In [26]:
age_zscore.loc[[1,9,0]]

Unnamed: 0,age,fare,survived
1,-0.01577,-0.164217,0.765188
9,-1.134029,0.701225,1.053035
0,-0.251342,-0.545549,-0.565014


In [28]:
len(age_zscore)

891

In [29]:
age_zscore.loc[0:9]

Unnamed: 0,age,fare,survived
0,-0.251342,-0.545549,-0.565014
1,-0.01577,-0.164217,0.765188
2,0.068776,-0.488239,1.766263
3,-0.218434,-0.396205,0.765188
4,0.789041,-0.477626,-0.565014
5,,-0.44296,-0.565014
6,1.065103,-0.411993,-1.300819
7,-1.851931,0.628236,-0.565014
8,0.148805,-0.215845,1.766263
9,-1.134029,0.701225,1.053035
