# 09_데이터 그룹화 및 집계
### 1. groupby
- **groupby()** : 데이터를 구분 할 수있는 열(column)의 값들을 이용하여 데이터를 여러 기준에 의해 구분하여 그룹화 한 후 기초 통계 함수 등을 적용할 수 있도록 함  
        DataFrame.groupby(by=None, axis=<no_default>, level=None,
        as_index=True, sort=True, group_keys=True, observed=<no_default>, dropna=True)  
https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.groupby.html

### 2. 단일 열로 그룹화
- groupby 함수의 인수로 그룹화할 열을 지정

In [1]:
import seaborn as sns

In [2]:
iris = sns.load_dataset('iris')

In [3]:
iris_grouped = iris.groupby(by=iris.species)  # by 속성으로 그룹핑할 열 지정

In [4]:
iris_grouped

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x78be6f8dcf70>

In [5]:
iris_grouped.mean()

Unnamed: 0_level_0,sepal_length,sepal_width,petal_length,petal_width
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
setosa,5.006,3.428,1.462,0.246
versicolor,5.936,2.77,4.26,1.326
virginica,6.588,2.974,5.552,2.026


### 3. 다중 열로 그룹화
-  groupby 함수의 인수로 **그룹화할 열을 리스트 형식**으로 지정

In [6]:
import numpy as np

In [7]:
# 실습을 위해iris에 25개씩 0부터 5까지 값을 갖는 열(num)을 추가
iris['num'] = np.ravel([[i]*25 for i in range(6)])

In [8]:
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,num
0,5.1,3.5,1.4,0.2,setosa,0
1,4.9,3.0,1.4,0.2,setosa,0
2,4.7,3.2,1.3,0.2,setosa,0
3,4.6,3.1,1.5,0.2,setosa,0
4,5.0,3.6,1.4,0.2,setosa,0


In [9]:
iris_grouped2 = iris.groupby(by=[iris.species, iris.num])

In [10]:
iris_grouped2.mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,sepal_length,sepal_width,petal_length,petal_width
species,num,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
setosa,0,5.028,3.48,1.46,0.248
setosa,1,4.984,3.376,1.464,0.244
versicolor,2,6.012,2.776,4.312,1.344
versicolor,3,5.86,2.764,4.208,1.308
virginica,4,6.576,2.928,5.64,2.044
virginica,5,6.6,3.02,5.464,2.008


### 4. 그룹간 반복 처리
- 그룹화된 데이터에서 그룹의 타입과 그룹 객체를 반복문을 이용해 처리 가능

In [11]:
for type, group in iris_grouped:   # type은 species의 값, group은 그루핑된 객체
    print(type, '\n', group.head())

setosa 
    sepal_length  sepal_width  petal_length  petal_width species  num
0           5.1          3.5           1.4          0.2  setosa    0
1           4.9          3.0           1.4          0.2  setosa    0
2           4.7          3.2           1.3          0.2  setosa    0
3           4.6          3.1           1.5          0.2  setosa    0
4           5.0          3.6           1.4          0.2  setosa    0
versicolor 
     sepal_length  sepal_width  petal_length  petal_width     species  num
50           7.0          3.2           4.7          1.4  versicolor    2
51           6.4          3.2           4.5          1.5  versicolor    2
52           6.9          3.1           4.9          1.5  versicolor    2
53           5.5          2.3           4.0          1.3  versicolor    2
54           6.5          2.8           4.6          1.5  versicolor    2
virginica 
      sepal_length  sepal_width  petal_length  petal_width    species  num
100           6.3          3.3    

### 5. 데이터프레임 그룹 인덱싱
- 데이터프레임 그룹에서의 인덱싱은 **take()** 함수를 이용

In [12]:
iris_grouped

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x78be6f8dcf70>

In [13]:
iris_grouped.loc[0,]  # DataFrameGroupBy object의 인덱싱은 take()함수 이용해야 함

AttributeError: 'DataFrameGroupBy' object has no attribute 'loc'

In [14]:
iris_grouped.take([0,1,2])  # 그룹별 인덱스를 가져옴

Unnamed: 0_level_0,Unnamed: 1_level_0,sepal_length,sepal_width,petal_length,petal_width
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
setosa,0,5.1,3.5,1.4,0.2
setosa,1,4.9,3.0,1.4,0.2
setosa,2,4.7,3.2,1.3,0.2
versicolor,50,7.0,3.2,4.7,1.4
versicolor,51,6.4,3.2,4.5,1.5
versicolor,52,6.9,3.1,4.9,1.5
virginica,100,6.3,3.3,6.0,2.5
virginica,101,5.8,2.7,5.1,1.9
virginica,102,7.1,3.0,5.9,2.1


In [15]:
iris_grouped.take([1,2,3])

Unnamed: 0_level_0,Unnamed: 1_level_0,sepal_length,sepal_width,petal_length,petal_width
species,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
setosa,1,4.9,3.0,1.4,0.2
setosa,2,4.7,3.2,1.3,0.2
setosa,3,4.6,3.1,1.5,0.2
versicolor,51,6.4,3.2,4.5,1.5
versicolor,52,6.9,3.1,4.9,1.5
versicolor,53,5.5,2.3,4.0,1.3
virginica,101,5.8,2.7,5.1,1.9
virginica,102,7.1,3.0,5.9,2.1
virginica,103,6.3,2.9,5.6,1.8
