## 6.5. Index

In [1]:
import pandas as pd
import numpy  as np

idx1 = pd.Index(np.arange(10))
idx1

Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int64')

In [2]:
# インデクスでアクセス
idx1[2]

2

In [3]:
# スライスでアクセス
idx1[::3]

Int64Index([0, 3, 6, 9], dtype='int64')

In [4]:
# マスキング
idx1[ idx1 < 5]

Int64Index([0, 1, 2, 3, 4], dtype='int64')

In [5]:
# 形状
idx1.shape

(10,)

In [6]:
# インデクスの名前変更
idx1.name = 'idx1'

In [7]:
idx1[0] = 4

TypeError: Index does not support mutable operations

### 6.5.1 マルチ（階層的）インデックス

In [8]:
import pandas as pd
import numpy as np

pd.set_option('display.notebook_repr_html', False)

activities_val = {'20230102': [80, 70, 75],
                  '20230103': [90, 75, 82],
                  '20230104': [95, 72, 89]}
df = pd.DataFrame(data=activities_val, index=['Adams', 'Moses', 'Miriam'])
df

        20230102  20230103  20230104
Adams         80        90        95
Moses         70        75        72
Miriam        75        82        89

In [9]:
multi_idx_df = pd.DataFrame(
                # np.random.randint(50, 100, 18).reshape(9,2),
                [[82,64],[58,72],[57,95],
                 [87,67],[64,98],[96,90],
                 [90,76],[68,55],[88,57]],
                index=[['Adams','Adams','Adams', 'Moses', 'Moses',
                        'Moses','Miriam','Miriam','Miriam'],
              ['20230102','20230103','20230104','20230102','20230103',
              '20230104','20230102','20230103','20230104']],
              columns=['Act', 'Breath'])
multi_idx_df

                 Act  Breath
Adams  20230102   82      64
       20230103   58      72
       20230104   57      95
Moses  20230102   87      67
       20230103   64      98
       20230104   96      90
Miriam 20230102   90      76
       20230103   68      55
       20230104   88      57

In [10]:
multi_idx_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 9 entries, ('Adams', '20230102') to ('Miriam', '20230104')
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Act     9 non-null      int64
 1   Breath  9 non-null      int64
dtypes: int64(2)
memory usage: 458.0+ bytes


In [11]:
idx = pd.MultiIndex.from_product([['Adams','Moses', 'Miriam'],
                                  ['20230102','20230103','20230104']])
multi_idx_product_df = pd.DataFrame(
  # np.random.randint(50, 100, 18).reshape(9,2),
                  [[57,83],[82,62],[75,69],
                 [81,88],[78,86],[76,78],
                 [68,99],[99,87],[63,62]],
                 index=idx, columns=['Act', 'Breath'])
multi_idx_product_df

                 Act  Breath
Adams  20230102   57      83
       20230103   82      62
       20230104   75      69
Moses  20230102   81      88
       20230103   78      86
       20230104   76      78
Miriam 20230102   68      99
       20230103   99      87
       20230104   63      62

In [12]:
tuple_index = \
   [('Adams', '20230102'), ('Adams', '20230103'), ('Adams', '20230104'),
   ('Moses', '20230102'),('Moses', '20230103'),('Moses', '20230104'),
   ('Miriam', '20230102'),('Miriam', '20230103'),('Miriam', '20230104')
   ]

multi_index = pd.MultiIndex.from_tuples(tuple_index)
multi_index.names = ['FirstName','Date'] # インデクスの名前変更
#data=np.random.randint(50, 100, 18).reshape(9,2),
multi_idx_tuple_df = pd.DataFrame(
                          data=np.array([[92,96],[90,98],[66,86],[87,58],[57,82],
                                         [98,98],[76,83],[58,76],[62,99]]),
                          index=multi_index, columns=['Act', 'Breath'])
multi_idx_tuple_df

                    Act  Breath
FirstName Date                 
Adams     20230102   92      96
          20230103   90      98
          20230104   66      86
Moses     20230102   87      58
          20230103   57      82
          20230104   98      98
Miriam    20230102   76      83
          20230103   58      76
          20230104   62      99

In [13]:
multi_idx_tuple_df.sort_index(inplace=True)
multi_idx_tuple_df

                    Act  Breath
FirstName Date                 
Adams     20230102   92      96
          20230103   90      98
          20230104   66      86
Miriam    20230102   76      83
          20230103   58      76
          20230104   62      99
Moses     20230102   87      58
          20230103   57      82
          20230104   98      98

### 6.5.2 インデックスを使用してデータにアクセスする

#### iloc, loc[]属性

In [14]:
# 暗黙的インデクスでアクセス（3行目まで、1列目まで）
multi_idx_tuple_df.iloc[:3, :1]

                    Act
FirstName Date         
Adams     20230102   92
          20230103   90
          20230104   66

In [15]:
# 明示的インデックスでのアクセス
multi_idx_tuple_df.loc['Adams',:]

          Act  Breath
Date                 
20230102   92      96
20230103   90      98
20230104   66      86

In [16]:
# 明示的インデックスでのアクセス
multi_idx_tuple_df.loc['Adams','20230103']

Act       90
Breath    98
Name: (Adams, 20230103), dtype: int32

In [17]:
# ファンシーインデックスも可能
multi_idx_tuple_df.loc[['Adams','Miriam']]

                    Act  Breath
FirstName Date                 
Adams     20230102   92      96
          20230103   90      98
          20230104   66      86
Miriam    20230102   76      83
          20230103   58      76
          20230104   62      99

In [18]:
multi_idx_df.loc[:, '20230103']

KeyError: '20230103'

In [19]:
multi_idx_df.loc[(slice(None), '20230102'), :]

                 Act  Breath
Adams  20230102   82      64
Moses  20230102   87      67
Miriam 20230102   90      76

In [20]:
# pd.IndexSliceで「レベル0」検索
idx = pd.IndexSlice
multi_idx_tuple_df.loc[ idx[:, '20230102'], : ]

                    Act  Breath
FirstName Date                 
Adams     20230102   92      96
Miriam    20230102   76      83
Moses     20230102   87      58

In [21]:
# queryメソッドで「レベル1」検索
multi_idx_tuple_df.query("Date == '20230102'")

                    Act  Breath
FirstName Date                 
Adams     20230102   92      96
Miriam    20230102   76      83
Moses     20230102   87      58

#### xs() メソッド

In [22]:
# 「20230103」日の「活動指数」及び「呼吸数」の検索
multi_idx_tuple_df.xs('20230103', level='Date')

           Act  Breath
FirstName             
Adams       90      98
Miriam      58      76
Moses       57      82

In [23]:
multi_idx_tuple_df.xs('20230103', level=1, drop_level=False)

                    Act  Breath
FirstName Date                 
Adams     20230103   90      98
Miriam    20230103   58      76
Moses     20230103   57      82

In [24]:
multi_idx_tuple_df.xs('Adams', level=0, drop_level=False)

                    Act  Breath
FirstName Date                 
Adams     20230102   92      96
          20230103   90      98
          20230104   66      86

In [25]:
multi_idx_tuple_df.xs(('Moses','20230102'), drop_level=False)

Act       87
Breath    58
Name: (Moses, 20230102), dtype: int32

### 6.5.3 インデックスの設定と解除

In [26]:
multi_idx_tuple_df.unstack(level=1)

               Act                     Breath                  
Date      20230102 20230103 20230104 20230102 20230103 20230104
FirstName                                                      
Adams           92       90       66       96       98       86
Miriam          76       58       62       83       76       99
Moses           87       57       98       58       82       98

In [27]:
multi_idx_tuple_df.unstack(level=0)

            Act              Breath             
FirstName Adams Miriam Moses  Adams Miriam Moses
Date                                            
20230102     92     76    87     96     83    58
20230103     90     58    57     98     76    82
20230104     66     62    98     86     99    98

In [28]:
df2 = multi_idx_tuple_df.unstack(level=0)
df2

            Act              Breath             
FirstName Adams Miriam Moses  Adams Miriam Moses
Date                                            
20230102     92     76    87     96     83    58
20230103     90     58    57     98     76    82
20230104     66     62    98     86     99    98

In [29]:
df2.index

Index(['20230102', '20230103', '20230104'], dtype='object', name='Date')

In [30]:
df2.loc['20230102']

        FirstName
Act     Adams        92
        Miriam       76
        Moses        87
Breath  Adams        96
        Miriam       83
        Moses        58
Name: 20230102, dtype: int32

In [31]:
no_index_df = multi_idx_tuple_df.reset_index()
no_index_df

  FirstName      Date  Act  Breath
0     Adams  20230102   92      96
1     Adams  20230103   90      98
2     Adams  20230104   66      86
3    Miriam  20230102   76      83
4    Miriam  20230103   58      76
5    Miriam  20230104   62      99
6     Moses  20230102   87      58
7     Moses  20230103   57      82
8     Moses  20230104   98      98

In [32]:
no_index_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   FirstName  9 non-null      object
 1   Date       9 non-null      object
 2   Act        9 non-null      int32 
 3   Breath     9 non-null      int32 
dtypes: int32(2), object(2)
memory usage: 344.0+ bytes


In [33]:
set_index_df = no_index_df.set_index(['FirstName', 'Date'])
set_index_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 9 entries, ('Adams', '20230102') to ('Moses', '20230104')
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Act     9 non-null      int32
 1   Breath  9 non-null      int32
dtypes: int32(2)
memory usage: 465.0+ bytes


In [34]:
idx = ['Tokyo','Osaka','Yokohama']
# df = pd.DataFrame(np.random.randint(50, 100, 3).reshape(3,1),
#               index=idx)
df = pd.DataFrame(np.array([64,76,92]),
                  index=idx)
df

           0
Tokyo     64
Osaka     76
Yokohama  92

In [35]:
# Yamagataインデクスがないので、「０」で埋める
df.reindex( ['Tokyo','Osaka','Yamagata'] , fill_value=0)

           0
Tokyo     64
Osaka     76
Yamagata   0

In [36]:
changed_level = set_index_df.swaplevel('FirstName', 'Date')
changed_level

                    Act  Breath
Date     FirstName             
20230102 Adams       92      96
20230103 Adams       90      98
20230104 Adams       66      86
20230102 Miriam      76      83
20230103 Miriam      58      76
20230104 Miriam      62      99
20230102 Moses       87      58
20230103 Moses       57      82
20230104 Moses       98      98

In [37]:
changed_level.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 9 entries, ('20230102', 'Adams') to ('20230104', 'Moses')
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Act     9 non-null      int32
 1   Breath  9 non-null      int32
dtypes: int32(2)
memory usage: 465.0+ bytes


### 6.5.4 簡単なな集計と統計

In [38]:
multi_idx_tuple_df.mean(level='FirstName')

  multi_idx_tuple_df.mean(level='FirstName')


                 Act     Breath
FirstName                      
Adams      82.666667  93.333333
Miriam     65.333333  86.000000
Moses      80.666667  79.333333

In [39]:
multi_idx_tuple_df.sum(level='FirstName')

  multi_idx_tuple_df.sum(level='FirstName')


           Act  Breath
FirstName             
Adams      248     280
Miriam     196     258
Moses      242     238

In [40]:
multi_idx_tuple_df.min(level='Date')

  multi_idx_tuple_df.min(level='Date')


          Act  Breath
Date                 
20230102   76      58
20230103   57      76
20230104   62      86

In [41]:
multi_idx_tuple_df.groupby(level='FirstName').sum()

           Act  Breath
FirstName             
Adams      248     280
Miriam     196     258
Moses      242     238

In [42]:
import pandas as pd
import numpy as np
activities_val = {'20230102': [80, 70, 75],
                  '20230103': [90, 75, 82],
                  '20230104': [95, 72, 89]}
df = pd.DataFrame(data=activities_val, index=['Adams', 'Moses', 'Miriam'])
df.index.names = ['FirstName']
df

           20230102  20230103  20230104
FirstName                              
Adams            80        90        95
Moses            70        75        72
Miriam           75        82        89

In [43]:
# MultiIndexではないため、levelオプションが適用されないことに注意してください。
df.min(level="FirstName")

  df.min(level="FirstName")


           20230102  20230103  20230104
FirstName                              
Adams            80        90        95
Moses            70        75        72
Miriam           75        82        89

In [44]:
df.mean(axis=1)

FirstName
Adams     88.333333
Moses     72.333333
Miriam    82.000000
dtype: float64

In [45]:
df.mean(axis=0)

20230102    75.000000
20230103    82.333333
20230104    85.333333
dtype: float64