## 7.2 GroupByメソッド

In [2]:
import pandas as pd
import numpy as np

pd.set_option('display.notebook_repr_html', False)

adams_act = pd.DataFrame(
                [
                  ['20230102', 92, 76],
                  ['20230102', 89, 75], # 新規
                  ['20230102', 81, 69], # 新規
                  ['20230103', 65, 87],
                  ['20230104', 96, 76]
                ],
                columns=['Date', 'Act', 'Breath'])
print(adams_act)

       Date  Act  Breath
0  20230102   92      76
1  20230102   89      75
2  20230102   81      69
3  20230103   65      87
4  20230104   96      76


In [3]:
adams_act.groupby('Date')

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x0000022062CCF610>

In [4]:
adams_act.groupby('Date').describe()

           Act                                                    Breath  \
         count       mean       std   min   25%   50%   75%   max  count   
Date                                                                       
20230102   3.0  87.333333  5.686241  81.0  85.0  89.0  90.5  92.0    3.0   
20230103   1.0  65.000000       NaN  65.0  65.0  65.0  65.0  65.0    1.0   
20230104   1.0  96.000000       NaN  96.0  96.0  96.0  96.0  96.0    1.0   

                                                             
               mean       std   min   25%   50%   75%   max  
Date                                                         
20230102  73.333333  3.785939  69.0  72.0  75.0  75.5  76.0  
20230103  87.000000       NaN  87.0  87.0  87.0  87.0  87.0  
20230104  76.000000       NaN  76.0  76.0  76.0  76.0  76.0  

In [5]:
adams_act.groupby('Date').count()

          Act  Breath
Date                 
20230102    3       3
20230103    1       1
20230104    1       1

In [6]:
adams_act.groupby('Date').size()

Date
20230102    3
20230103    1
20230104    1
dtype: int64

In [7]:
adams_act.groupby('Date').nth(2)

          Act  Breath
Date                 
20230102   81      69

In [8]:
adams_act.groupby('Date')['Act'].sum()

Date
20230102    262
20230103     65
20230104     96
Name: Act, dtype: int64

In [9]:
adams_act.groupby('Date').sum()

          Act  Breath
Date                 
20230102  262     220
20230103   65      87
20230104   96      76

In [10]:
adams_act.groupby('Date').mean()

                Act     Breath
Date                          
20230102  87.333333  73.333333
20230103  65.000000  87.000000
20230104  96.000000  76.000000

In [11]:
adams_act.groupby('Date').median()

           Act  Breath
Date                  
20230102  89.0    75.0
20230103  65.0    87.0
20230104  96.0    76.0

In [12]:
adams_act.groupby('Date').min()

          Act  Breath
Date                 
20230102   81      69
20230103   65      87
20230104   96      76

In [13]:
adams_act.groupby('Date').max()

          Act  Breath
Date                 
20230102   92      76
20230103   65      87
20230104   96      76

### 7.2.1 aggregate/aggメソッド

In [14]:
adams_act.groupby('Date').aggregate([min, max, np.mean, sum])

         Act                     Breath                    
         min max       mean  sum    min max       mean  sum
Date                                                       
20230102  81  92  87.333333  262     69  76  73.333333  220
20230103  65  65  65.000000   65     87  87  87.000000   87
20230104  96  96  96.000000   96     76  76  76.000000   76

### 7.2.2 filterメソッド

In [15]:
def filter_f(a):
  return a['Date'].count() > 1

adams_act.groupby('Date').filter(filter_f)

       Date  Act  Breath
0  20230102   92      76
1  20230102   89      75
2  20230102   81      69

In [16]:
adams_act.filter(items=['Act', 'Breath'])

   Act  Breath
0   92      76
1   89      75
2   81      69
3   65      87
4   96      76

In [17]:
# likeパラメータ
adams_act.filter(like='Ac')

   Act
0   92
1   89
2   81
3   65
4   96

### 7.2.3 applyメソッド

In [18]:
adams_act.groupby('Date').apply(np.sum)

                              Date  Act  Breath
Date                                           
20230102  202301022023010220230102  262     220
20230103                  20230103   65      87
20230104                  20230104   96      76

In [19]:
def apply_func(a, n):
  return a**n

adams_act.groupby('Date').apply(apply_func, n=3)

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  adams_act.groupby('Date').apply(apply_func, n=3)


      Act  Breath
0  778688  438976
1  704969  421875
2  531441  328509
3  274625  658503
4  884736  438976

In [20]:
def apply_func(a):
  a['Act2'] = a['Act']**2
  return a

adams_act.groupby('Date').apply(apply_func)

To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  adams_act.groupby('Date').apply(apply_func)


       Date  Act  Breath  Act2
0  20230102   92      76  8464
1  20230102   89      75  7921
2  20230102   81      69  6561
3  20230103   65      87  4225
4  20230104   96      76  9216

### 7.2.5 applymapメソッド

In [21]:
adams_act_wo_date = adams_act.set_index('Date')
adams_act_wo_date.applymap(lambda x: x**2)

           Act  Breath
Date                  
20230102  8464    5776
20230102  7921    5625
20230102  6561    4761
20230103  4225    7569
20230104  9216    5776

### 7.2.6 transformメソッド

In [22]:
adams_act.groupby('Date').transform(np.sum)

   Act  Breath
0  262     220
1  262     220
2  262     220
3   65      87
4   96      76