## 6.3 データフレーム(DataFrame)

In [2]:
import pandas as pd
import numpy as np
from pandas import Series, DataFrame, Index

pd.set_option('display.notebook_repr_html', False)

df1 = pd.DataFrame(np.random.rand(3,3))
df1

          0         1         2
0  0.385208  0.515737  0.478192
1  0.120191  0.326432  0.049565
2  0.607752  0.936891  0.720915

In [3]:
# ２番目の列がインデクシング
df1[1]

0    0.515737
1    0.326432
2    0.936891
Name: 1, dtype: float64

In [4]:
# ２番目の行がインデクシング（明示的）
df1.loc[1]

0    0.120191
1    0.326432
2    0.049565
Name: 1, dtype: float64

In [5]:
# ２番目の行がインデクシング（暗黙的）
df1.iloc[1]

0    0.120191
1    0.326432
2    0.049565
Name: 1, dtype: float64

In [6]:
# 列名でアクセス不可
df1.0

SyntaxError: invalid syntax (3631459736.py, line 2)

In [8]:
import pandas as pd
import numpy as np

series_jp = pd.Series(np.arange(0,10,2), index=['tokyo', 'osaka', 'sendai', 'yamagata', 'yonezawa'])
series_kr = pd.Series(np.arange(0,5,1), index=['seoul', 'pusan', 'incheon', 'daejeon', 'daegu'])

cities_asia = pd.DataFrame({'japan': series_jp, 'korea': series_kr})
cities_asia

          japan  korea
daegu       NaN    4.0
daejeon     NaN    3.0
incheon     NaN    2.0
osaka       2.0    NaN
pusan       NaN    1.0
sendai      4.0    NaN
seoul       NaN    0.0
tokyo       0.0    NaN
yamagata    6.0    NaN
yonezawa    8.0    NaN

In [9]:
# korea 列にアクセスする
cities_asia['korea'] # cities_asia.koreaと同じ

daegu       4.0
daejeon     3.0
incheon     2.0
osaka       NaN
pusan       1.0
sendai      NaN
seoul       0.0
tokyo       NaN
yamagata    NaN
yonezawa    NaN
Name: korea, dtype: float64

In [10]:
cities_asia.korea

daegu       4.0
daejeon     3.0
incheon     2.0
osaka       NaN
pusan       1.0
sendai      NaN
seoul       0.0
tokyo       NaN
yamagata    NaN
yonezawa    NaN
Name: korea, dtype: float64

In [11]:
# tokyo行にアクセス
cities_asia.loc['tokyo']

japan    0.0
korea    NaN
Name: tokyo, dtype: float64

In [12]:
# ８番目の行(tokyo)にアクセス
cities_asia.iloc[7]

japan    0.0
korea    NaN
Name: tokyo, dtype: float64

In [13]:
# このようにインデクスが指定できる。series_kr['daegu']と同じ
series_kr.daegu

4

In [14]:
# 失敗
series_jp.daegu

AttributeError: 'Series' object has no attribute 'daegu'

In [15]:
cities_asia.index

Index(['daegu', 'daejeon', 'incheon', 'osaka', 'pusan', 'sendai', 'seoul',
       'tokyo', 'yamagata', 'yonezawa'],
      dtype='object')

In [16]:
cities_asia.columns

Index(['japan', 'korea'], dtype='object')

In [17]:
cities_asia.values

array([[nan,  4.],
       [nan,  3.],
       [nan,  2.],
       [ 2., nan],
       [nan,  1.],
       [ 4., nan],
       [nan,  0.],
       [ 0., nan],
       [ 6., nan],
       [ 8., nan]])

In [18]:
# ２番目の行にアクセス
cities_asia.values[1]

array([nan,  3.])

### 6.3.1 T(転置)

In [19]:
#　転置
cities_asia.T

       daegu  daejeon  incheon  osaka  pusan  sendai  seoul  tokyo  yamagata  \
japan    NaN      NaN      NaN    2.0    NaN     4.0    NaN    0.0       6.0   
korea    4.0      3.0      2.0    NaN    1.0     NaN    0.0    NaN       NaN   

       yonezawa  
japan       8.0  
korea       NaN  

In [20]:
cities_asia

          japan  korea
daegu       NaN    4.0
daejeon     NaN    3.0
incheon     NaN    2.0
osaka       2.0    NaN
pusan       NaN    1.0
sendai      4.0    NaN
seoul       NaN    0.0
tokyo       0.0    NaN
yamagata    6.0    NaN
yonezawa    8.0    NaN

### 6.3.2 スライス

In [21]:
# スライス：３番目の行まで、２番目の列まで
cities_asia.iloc[:3, :2]

         japan  korea
daegu      NaN    4.0
daejeon    NaN    3.0
incheon    NaN    2.0

In [22]:
# ilocを用いて、３番目の行、１番目の列の値の変更
cities_asia.iloc[2,0] = 12

In [23]:
# ３番目の行まで、全ての列にアクセス。変更された値(12)が確認できる
cities_asia.iloc[:3, :]

         japan  korea
daegu      NaN    4.0
daejeon    NaN    3.0
incheon   12.0    2.0

In [24]:
# cities_asia.iloc[:3, :]と同じ結果
cities_asia.iloc[:3]

         japan  korea
daegu      NaN    4.0
daejeon    NaN    3.0
incheon   12.0    2.0

In [25]:
cities_asia.iloc[::2]

          japan  korea
daegu       NaN    4.0
incheon    12.0    2.0
pusan       NaN    1.0
seoul       NaN    0.0
yamagata    6.0    NaN

In [26]:
cities_asia.iloc[::-2]

          japan  korea
yonezawa    8.0    NaN
tokyo       0.0    NaN
sendai      4.0    NaN
osaka       2.0    NaN
daejeon     NaN    3.0

In [27]:
# startが最後の要素から3番目まで
cities_asia.iloc[-3:]

          japan  korea
tokyo       0.0    NaN
yamagata    6.0    NaN
yonezawa    8.0    NaN

In [28]:
# endが最後の要素から３番目
cities_asia[:-3]

         japan  korea
daegu      NaN    4.0
daejeon    NaN    3.0
incheon   12.0    2.0
osaka      2.0    NaN
pusan      NaN    1.0
sendai     4.0    NaN
seoul      NaN    0.0

### 6.3.3 スライスを使用して逆順にソート

In [29]:
cities_asia[::-1]

          japan  korea
yonezawa    8.0    NaN
yamagata    6.0    NaN
tokyo       0.0    NaN
seoul       NaN    0.0
sendai      4.0    NaN
pusan       NaN    1.0
osaka       2.0    NaN
incheon    12.0    2.0
daejeon     NaN    3.0
daegu       NaN    4.0

## 6.3.4 マスキング

In [30]:
# japan列の中で値が10を超える行を見つける
cities_asia[cities_asia.japan > 10]

         japan  korea
incheon   12.0    2.0

In [31]:
cities_asia.index

Index(['daegu', 'daejeon', 'incheon', 'osaka', 'pusan', 'sendai', 'seoul',
       'tokyo', 'yamagata', 'yonezawa'],
      dtype='object')