# データのマージ

In [1]:
import numpy as np
import pandas as pd
from pandas import DataFrame

In [4]:
dframe1 = DataFrame({'key':['X', 'Z', 'Y', 'Z', 'X', 'X'], 'data_set1':np.arange(6)})
dframe1

Unnamed: 0,data_set1,key
0,0,X
1,1,Z
2,2,Y
3,3,Z
4,4,X
5,5,X


In [5]:
dframe2 = DataFrame({'key':['Q', 'Y', 'Z'], 'data_set2':[1,2,3]})
dframe2

Unnamed: 0,data_set2,key
0,1,Q
1,2,Y
2,3,Z


In [6]:
# merge：keyをkeyにjoin
pd.merge(dframe1, dframe2)

Unnamed: 0,data_set1,key,data_set2
0,1,Z,3
1,3,Z,3
2,2,Y,2


In [8]:
# keyを明示的に指定する場合
pd.merge(dframe1, dframe2, on='key')

Unnamed: 0,data_set1,key,data_set2
0,1,Z,3
1,3,Z,3
2,2,Y,2


In [10]:
# left outer join
pd.merge(dframe1, dframe2, on='key', how='left')

Unnamed: 0,data_set1,key,data_set2
0,0,X,
1,1,Z,3.0
2,2,Y,2.0
3,3,Z,3.0
4,4,X,
5,5,X,


In [12]:
# right outer join
pd.merge(dframe1, dframe2, on='key', how='right')

Unnamed: 0,data_set1,key,data_set2
0,1.0,Z,3
1,3.0,Z,3
2,2.0,Y,2
3,,Q,1


In [15]:
# full outer join
pd.merge(dframe1, dframe2, on='key', how='outer')

Unnamed: 0,data_set1,key,data_set2
0,0.0,X,
1,4.0,X,
2,5.0,X,
3,1.0,Z,3.0
4,3.0,Z,3.0
5,2.0,Y,2.0
6,,Q,1.0


# indexを使ったマージ

In [1]:
import pandas as pd
import numpy as np
from pandas import DataFrame

In [2]:
df_left = DataFrame({'key':['X', 'Y', 'Z', 'X', 'Y'],
                    'data':range(5)})
df_left

Unnamed: 0,data,key
0,0,X
1,1,Y
2,2,Z
3,3,X
4,4,Y


In [4]:
df_right = DataFrame({'group_data':[10,20]},
                    index=['X', 'Y'])
df_right

Unnamed: 0,group_data
X,10
Y,20


In [6]:
# 列とindexのmerge
pd.merge(df_left, df_right, left_on='key', right_index=True)

Unnamed: 0,data,key,group_data
0,0,X,10
3,3,X,10
1,1,Y,20
4,4,Y,20


In [7]:
pd.merge(df_left, df_right, left_on='key', right_index=True, how='outer')

Unnamed: 0,data,key,group_data
0,0,X,10.0
3,3,X,10.0
1,1,Y,20.0
4,4,Y,20.0
2,2,Z,


In [9]:
df_left_hr = DataFrame({'key1':['SF', 'SF', 'SF', 'LA', 'LA'],
                       'key2':[10,20,30,20,30],
                       'data_set':np.arange(5)})
df_left_hr

Unnamed: 0,data_set,key1,key2
0,0,SF,10
1,1,SF,20
2,2,SF,30
3,3,LA,20
4,4,LA,30


In [11]:
df_right_hr = DataFrame(np.arange(10).reshape((5,2)),
                       index=[['LA', 'LA', 'SF', 'SF', 'SF'],
                             [20,10,10,10,20]],
                       columns=['col_1', 'col_2'])
df_right_hr

Unnamed: 0,Unnamed: 1,col_1,col_2
LA,20,0,1
LA,10,2,3
SF,10,4,5
SF,10,6,7
SF,20,8,9


In [12]:
# indexに階層構造があるデータをマージ
pd.merge(df_left_hr, df_right_hr, left_on=['key1', 'key2'], right_index=True)

Unnamed: 0,data_set,key1,key2,col_1,col_2
0,0,SF,10,4,5
0,0,SF,10,6,7
1,1,SF,20,8,9
3,3,LA,20,0,1


In [14]:
# joinメソッドも存在する（実はこっちのほうがより使う）
df_left.join(df_right)

Unnamed: 0,data,key,group_data
0,0,X,
1,1,Y,
2,2,Z,
3,3,X,
4,4,Y,


# データの連結

In [15]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [16]:
arr1 = np.arange(9).reshape((3,3))
arr1

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [19]:
# 列方向の連結
np.concatenate([arr1, arr1], axis=1)

array([[0, 1, 2, 0, 1, 2],
       [3, 4, 5, 3, 4, 5],
       [6, 7, 8, 6, 7, 8]])

In [21]:
# 行方向
np.concatenate([arr1, arr1], axis=0)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8],
       [0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [22]:
# 同じことをSeriesで
ser1 = Series([0,1,2], index=['T', 'U', 'V'])
ser2 = Series([3,4], index=['X', 'Y'])

In [23]:
ser1

T    0
U    1
V    2
dtype: int64

In [24]:
ser2

X    3
Y    4
dtype: int64

In [26]:
# 連結
pd.concat([ser1, ser2])

T    0
U    1
V    2
X    3
Y    4
dtype: int64

In [27]:
pd.concat([ser1, ser2, ser1])

T    0
U    1
V    2
X    3
Y    4
T    0
U    1
V    2
dtype: int64

In [29]:
# 列方向の連結 このときDataFrameになる
pd.concat([ser1, ser2], axis=1)

Unnamed: 0,0,1
T,0.0,
U,1.0,
V,2.0,
X,,3.0
Y,,4.0


In [32]:
# keyを指定すると、元のseriesのindexが指定したkeyによって階層構造になって戻ってくる
pd.concat([ser1, ser2], keys=['cat1', 'cat2'])

cat1  T    0
      U    1
      V    2
cat2  X    3
      Y    4
dtype: int64

In [34]:
# ↑にaxis=1を追加すると、keysがカラム名になる
pd.concat([ser1, ser2], keys=['cat1', 'cat2'], axis=1)

Unnamed: 0,cat1,cat2
T,0.0,
U,1.0,
V,2.0,
X,,3.0
Y,,4.0


In [39]:
# DataFrameでも同じことをやる
dframe1 = DataFrame(np.random.randn(4,3), columns=['X', 'Y', 'Z'])
dframe1

Unnamed: 0,X,Y,Z
0,0.528245,0.677301,-0.272111
1,-0.377101,0.803664,1.469076
2,0.186565,-1.450115,-0.655305
3,0.482573,0.047952,-2.197574


In [40]:
dframe2 = DataFrame(np.random.randn(3,3), columns=['Y', 'Q', 'X'])
dframe2

Unnamed: 0,Y,Q,X
0,2.427147,0.425114,-0.139449
1,0.583988,-0.9071,-0.464675
2,0.251465,0.437104,-1.038415


In [41]:
# 連結
pd.concat([dframe1, dframe2])

Unnamed: 0,Q,X,Y,Z
0,,0.528245,0.677301,-0.272111
1,,-0.377101,0.803664,1.469076
2,,0.186565,-1.450115,-0.655305
3,,0.482573,0.047952,-2.197574
0,0.425114,-0.139449,2.427147,
1,-0.9071,-0.464675,0.583988,
2,0.437104,-1.038415,0.251465,


In [42]:
# concat前のindexを無視する
pd.concat([dframe1, dframe2], ignore_index=True)

Unnamed: 0,Q,X,Y,Z
0,,0.528245,0.677301,-0.272111
1,,-0.377101,0.803664,1.469076
2,,0.186565,-1.450115,-0.655305
3,,0.482573,0.047952,-2.197574
4,0.425114,-0.139449,2.427147,
5,-0.9071,-0.464675,0.583988,
6,0.437104,-1.038415,0.251465,


# データを組み合わせる

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [3]:
ser1 = Series([2, np.nan, 4, np.nan, 6, np.nan],
                     index=['Q', 'R', 'S', 'T', 'U', 'V'])
ser1

Q    2.0
R    NaN
S    4.0
T    NaN
U    6.0
V    NaN
dtype: float64

In [6]:
ser2 = Series(np.arange(len(ser1), dtype=np.float64),
                     index=['Q', 'R', 'S', 'T', 'U', 'V'])
ser2

Q    0.0
R    1.0
S    2.0
T    3.0
U    4.0
V    5.0
dtype: float64

In [7]:
# np.whereの挙動
np.where(pd.isnull(ser1))

(array([1, 3, 5]),)

In [9]:
Series(np.where(pd.isnull(ser1), ser2, ser1), index=ser1.index)

Q    2.0
R    1.0
S    4.0
T    3.0
U    6.0
V    5.0
dtype: float64

In [11]:
# これで同じことができる
ser1.combine_first(ser2)

Q    2.0
R    1.0
S    4.0
T    3.0
U    6.0
V    5.0
dtype: float64

In [14]:
# データフレームでも同じことができる
dframe_odds = DataFrame({'X':[1, np.nan, 3, np.nan],
                                          'Y':[np.nan, 5, np.nan, 7],
                                          'Z':[np.nan, 9, np.nan, 11]})
dframe_odds

Unnamed: 0,X,Y,Z
0,1.0,,
1,,5.0,9.0
2,3.0,,
3,,7.0,11.0


In [16]:
dframe_even = DataFrame({'X':[2, 4, np.nan, 6, 8],
                                          'Y':[np.nan, 10, 12, 14, 16]})
dframe_even

Unnamed: 0,X,Y
0,2.0,
1,4.0,10.0
2,,12.0
3,6.0,14.0
4,8.0,16.0


In [17]:
# nullならeven 
dframe_odds.combine_first(dframe_even)

Unnamed: 0,X,Y,Z
0,1.0,,
1,4.0,5.0,9.0
2,3.0,12.0,
3,6.0,7.0,11.0
4,8.0,16.0,


# SeriesとDataFrameの変換

In [18]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

In [24]:
dframe1 = DataFrame(np.arange(8).reshape((2,4)),
                   index=pd.Index(['LA', 'SP'], name='city'),
                   columns=pd.Index(['A', 'B', 'C', 'D'], name='letter'))
dframe1

letter,A,B,C,D
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LA,0,1,2,3
SP,4,5,6,7


In [27]:
# DataFrame → Series
dframe_st = dframe1.stack()
dframe_st

city  letter
LA    A         0
      B         1
      C         2
      D         3
SP    A         4
      B         5
      C         6
      D         7
dtype: int64

In [29]:
# Series → DataFrame
dframe_st.unstack()

letter,A,B,C,D
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LA,0,1,2,3
SP,4,5,6,7


In [30]:
dframe_st.unstack(0)

city,LA,SP
letter,Unnamed: 1_level_1,Unnamed: 2_level_1
A,0,4
B,1,5
C,2,6
D,3,7


In [31]:
dframe_st.unstack(1)

letter,A,B,C,D
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LA,0,1,2,3
SP,4,5,6,7


In [33]:
dframe_st.unstack('city')

city,LA,SP
letter,Unnamed: 1_level_1,Unnamed: 2_level_1
A,0,4
B,1,5
C,2,6
D,3,7


In [34]:
dframe_st.unstack('letter')

letter,A,B,C,D
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
LA,0,1,2,3
SP,4,5,6,7


In [35]:
ser1 = Series([0, 1, 2], index=['Q', 'X', 'Y'])
ser2 = Series([4, 5, 6], index=['X', 'Y', 'Z'])

In [36]:
ser1

Q    0
X    1
Y    2
dtype: int64

In [37]:
ser2

X    4
Y    5
Z    6
dtype: int64

In [40]:
dframe = pd.concat([ser1, ser2], keys=['Alpha', 'Beta'])
dframe

Alpha  Q    0
       X    1
       Y    2
Beta   X    4
       Y    5
       Z    6
dtype: int64

In [41]:
dframe.unstack()

Unnamed: 0,Q,X,Y,Z
Alpha,0.0,1.0,2.0,
Beta,,4.0,5.0,6.0


In [45]:
# stackはnull値がある場合は取り除く
dframe.unstack().stack()

Alpha  Q    0.0
       X    1.0
       Y    2.0
Beta   X    4.0
       Y    5.0
       Z    6.0
dtype: float64

In [46]:
# dropna=False とするとnullはそのまま保持する
dframe.unstack().stack(dropna=False)

Alpha  Q    0.0
       X    1.0
       Y    2.0
       Z    NaN
Beta   Q    NaN
       X    4.0
       Y    5.0
       Z    6.0
dtype: float64

# ピボットテーブルの作り方

In [47]:
import numpy as np
from pandas import DataFrame

In [48]:
import pandas.util.testing as tm
tm.N = 3

def unpivot(frame):
    N,K = frame.shape
    data = {'value': frame.values.ravel('F'),
                'variable': np.asarray(frame.columns).repeat(N),
                'date': np.tile(np.asarray(frame.index),K)}
    return DataFrame(data, columns=['date', 'variable', 'value'])

dframe = unpivot(tm.makeTimeDataFrame())

In [49]:
dframe

Unnamed: 0,date,variable,value
0,2000-01-03,A,-2.537167
1,2000-01-04,A,0.605186
2,2000-01-05,A,-0.067167
3,2000-01-03,B,0.631994
4,2000-01-04,B,0.348089
5,2000-01-05,B,0.981936
6,2000-01-03,C,0.544217
7,2000-01-04,C,-0.80469
8,2000-01-05,C,-0.168129
9,2000-01-03,D,0.022539


In [51]:
dframe_piv = dframe.pivot('date', 'variable', 'value')
dframe_piv

variable,A,B,C,D
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2000-01-03,-2.537167,0.631994,0.544217,0.022539
2000-01-04,0.605186,0.348089,-0.80469,-1.018647
2000-01-05,-0.067167,0.981936,-0.168129,-1.690444


# 重複したデータの処理

In [52]:
from pandas import DataFrame

In [54]:
dframe = DataFrame({'key1': ['A']*2 + ['B']* 3,
                                 'key2': [2,2,2,3,3]})
dframe

Unnamed: 0,key1,key2
0,A,2
1,A,2
2,B,2
3,B,3
4,B,3


In [58]:
dframe.duplicated()

0    False
1     True
2    False
3    False
4     True
dtype: bool

In [60]:
dframe.drop_duplicates()

Unnamed: 0,key1,key2
0,A,2
2,B,2
3,B,3


In [61]:
dframe.drop_duplicates(['key1'])

Unnamed: 0,key1,key2
0,A,2
2,B,2


In [62]:
dframe.drop_duplicates(['key2'])

Unnamed: 0,key1,key2
0,A,2
3,B,3


In [66]:
# 重複したやつのうちどこを取るか？
dframe.drop_duplicates(['key1'], keep='last')

Unnamed: 0,key1,key2
1,A,2
4,B,3


# マッピングを使った列の追加

In [67]:
from pandas import DataFrame

In [69]:
dframe = DataFrame({'city':['Alma', 'Brian Head', 'Fox Park'],
                   'altitude':[3158, 3000, 2753]})
dframe

Unnamed: 0,altitude,city
0,3158,Alma
1,3000,Brian Head
2,2753,Fox Park


In [70]:
state_map={'Alma':'Colorado', 'Brian Head':'Utah', 'Fox Parkk':'Wyoming'}

In [71]:
dframe['state'] = dframe['city'].map(state_map)

In [72]:
dframe

Unnamed: 0,altitude,city,state
0,3158,Alma,Colorado
1,3000,Brian Head,Utah
2,2753,Fox Park,


In [75]:
# 最も簡単な列の追加方法
dframe['key1'] = [0,1,2]

In [74]:
dframe

Unnamed: 0,altitude,city,state,key1
0,3158,Alma,Colorado,0
1,3000,Brian Head,Utah,1
2,2753,Fox Park,,2


# 置換

In [76]:
import numpy as np
from pandas import Series

In [77]:
ser1 = Series([1,2,3,4,1,2,3,4])
ser1

0    1
1    2
2    3
3    4
4    1
5    2
6    3
7    4
dtype: int64

In [78]:
ser1.replace(1, 10)

0    10
1     2
2     3
3     4
4    10
5     2
6     3
7     4
dtype: int64

In [80]:
# いっぺんに置換することもできる
ser1.replace([1,4], [100, 400])

0    100
1      2
2      3
3    400
4    100
5      2
6      3
7    400
dtype: int64

In [83]:
# DataFrameでも同じことができる？
dframe = DataFrame(np.arange(9).reshape((3,3)))
dframe

Unnamed: 0,0,1,2
0,0,1,2
1,3,4,5
2,6,7,8


In [84]:
dframe.replace(4, np.nan)

Unnamed: 0,0,1,2
0,0,1.0,2
1,3,,5
2,6,7.0,8


# indexの変更

In [85]:
import numpy as np
from pandas import DataFrame

In [86]:
dframe = DataFrame(np.arange(12).reshape((3,4)),
                  index=['NY', 'LA', 'SF'],
                  columns=['A', 'B', 'C', 'D'])
dframe

Unnamed: 0,A,B,C,D
NY,0,1,2,3
LA,4,5,6,7
SF,8,9,10,11


In [87]:
str.lower('A')

'a'

In [89]:
indexname = dframe.index.map(str.lower)

In [90]:
dframe.index = indexname

In [91]:
dframe

Unnamed: 0,A,B,C,D
ny,0,1,2,3
la,4,5,6,7
sf,8,9,10,11


In [92]:
str.title('udemi is good')

'Udemi Is Good'

In [94]:
dframe.rename(index=str.title, columns=str.lower)

Unnamed: 0,a,b,c,d
Ny,0,1,2,3
La,4,5,6,7
Sf,8,9,10,11


In [95]:
dframe.rename(index={'ny':'NEW YORK'},
             columns={'A':'ALPHA'})

Unnamed: 0,ALPHA,B,C,D
NEW YORK,0,1,2,3
la,4,5,6,7
sf,8,9,10,11


# ビニング

In [1]:
import pandas as pd

In [2]:
years = [1990, 1991, 1992, 2008, 2012, 2015, 1987, 2013 , 2008, 1999]

In [4]:
len(years)

10

In [5]:
decade_bins = [1960, 1970, 1980, 1990, 2000, 2010, 2020]

In [6]:
decade_cat = pd.cut(years, decade_bins)
decade_cat

[(1980, 1990], (1990, 2000], (1990, 2000], (2000, 2010], (2010, 2020], (2010, 2020], (1980, 1990], (2010, 2020], (2000, 2010], (1990, 2000]]
Categories (6, object): [(1960, 1970] < (1970, 1980] < (1980, 1990] < (1990, 2000] < (2000, 2010] < (2010, 2020]]

In [7]:
decade_cat.categories

Index(['(1960, 1970]', '(1970, 1980]', '(1980, 1990]', '(1990, 2000]',
       '(2000, 2010]', '(2010, 2020]'],
      dtype='object')

In [8]:
pd.value_counts(decade_cat)

(2010, 2020]    3
(1990, 2000]    3
(2000, 2010]    2
(1980, 1990]    2
(1970, 1980]    0
(1960, 1970]    0
dtype: int64

In [10]:
# 何個に分けるかを指定することもできる
pd.cut(years, 2)

[(1986.972, 2001], (1986.972, 2001], (1986.972, 2001], (2001, 2015], (2001, 2015], (2001, 2015], (1986.972, 2001], (2001, 2015], (2001, 2015], (1986.972, 2001]]
Categories (2, object): [(1986.972, 2001] < (2001, 2015]]

# 外れ値

In [11]:
import numpy as np
from pandas import DataFrame

In [12]:
np.random.seed(12345)

In [14]:
dframe = DataFrame(np.random.randn(1000, 4))

In [15]:
dframe.head()

Unnamed: 0,0,1,2,3
0,-0.204708,0.478943,-0.519439,-0.55573
1,1.965781,1.393406,0.092908,0.281746
2,0.769023,1.246435,1.007189,-1.296221
3,0.274992,0.228913,1.352917,0.886429
4,-2.001637,-0.371843,1.669025,-0.43857


In [16]:
dframe.tail()

Unnamed: 0,0,1,2,3
995,1.089085,0.251232,-1.451985,1.653126
996,-0.478509,-0.010663,-1.060881,-1.50287
997,-1.946267,1.013592,0.037333,0.133304
998,-1.293122,-0.322542,-0.78296,-0.30334
999,0.089987,0.292291,1.177706,0.882755


In [17]:
dframe.describe()

Unnamed: 0,0,1,2,3
count,1000.0,1000.0,1000.0,1000.0
mean,-0.067684,0.067924,0.025598,-0.002298
std,0.998035,0.992106,1.006835,0.996794
min,-3.428254,-3.548824,-3.184377,-3.745356
25%,-0.77489,-0.591841,-0.641675,-0.644144
50%,-0.116401,0.101143,0.002073,-0.013611
75%,0.616366,0.780282,0.680391,0.654328
max,3.366626,2.653656,3.260383,3.927528


In [18]:
col = dframe[0]

In [20]:
col.head()

0   -0.204708
1    1.965781
2    0.769023
3    0.274992
4   -2.001637
Name: 0, dtype: float64

In [22]:
col[np.abs(col) >= 3]

523   -3.428254
900    3.366626
Name: 0, dtype: float64

In [23]:
np.abs(dframe)>3

Unnamed: 0,0,1,2,3
0,False,False,False,False
1,False,False,False,False
2,False,False,False,False
3,False,False,False,False
4,False,False,False,False
5,False,False,True,False
6,False,False,False,False
7,False,False,False,False
8,False,False,False,False
9,False,False,False,False


In [26]:
# any:どれかの
dframe[(np.abs(dframe)>3).any(1)]

Unnamed: 0,0,1,2,3
5,-0.539741,0.476985,3.248944,-1.021228
97,-0.774363,0.552936,0.106061,3.927528
102,-0.655054,-0.56523,3.176873,0.959533
305,-2.315555,0.457246,-0.025907,-3.399312
324,0.050188,1.951312,3.260383,0.963301
400,0.146326,0.508391,-0.196713,-3.745356
499,-0.293333,-0.242459,-3.05699,1.918403
523,-3.428254,-0.296336,-0.439938,-0.867165
586,0.275144,1.179227,-3.184377,1.369891
808,-0.362528,-3.548824,1.553205,-2.186301


In [28]:
# 外れ値を変換する
np.sign(dframe) # 符号を返すめそっど

Unnamed: 0,0,1,2,3
0,-1.0,1.0,-1.0,-1.0
1,1.0,1.0,1.0,1.0
2,1.0,1.0,1.0,-1.0
3,1.0,1.0,1.0,1.0
4,-1.0,-1.0,1.0,-1.0
5,-1.0,1.0,1.0,-1.0
6,-1.0,1.0,1.0,1.0
7,1.0,1.0,-1.0,-1.0
8,-1.0,-1.0,-1.0,1.0
9,-1.0,1.0,-1.0,1.0


In [29]:
dframe[np.abs(dframe)>3] = np.sign(dframe) * 3

In [30]:
dframe.describe()

Unnamed: 0,0,1,2,3
count,1000.0,1000.0,1000.0,1000.0
mean,-0.067623,0.068473,0.025153,-0.002081
std,0.995485,0.990253,1.003977,0.989736
min,-3.0,-3.0,-3.0,-3.0
25%,-0.77489,-0.591841,-0.641675,-0.644144
50%,-0.116401,0.101143,0.002073,-0.013611
75%,0.616366,0.780282,0.680391,0.654328
max,3.0,2.653656,3.0,3.0


# Permutation

In [31]:
import numpy as np
from pandas import DataFrame

In [32]:
dframe = DataFrame(np.arange(4*4).reshape((4,4)))
dframe

Unnamed: 0,0,1,2,3
0,0,1,2,3
1,4,5,6,7
2,8,9,10,11
3,12,13,14,15


In [33]:
blender = np.array([0, 3, 2, 1])
blender

array([0, 3, 2, 1])

In [34]:
dframe.take(blender)

Unnamed: 0,0,1,2,3
0,0,1,2,3
3,12,13,14,15
2,8,9,10,11
1,4,5,6,7


In [35]:
blender = np.random.permutation(4)
blender

array([1, 3, 2, 0])

In [37]:
box = np.array(['A', 'B', 'C'])
box

array(['A', 'B', 'C'], 
      dtype='<U1')

In [40]:
shaker = np.random.randint(0, len(box), size=10)
shaker

array([0, 2, 0, 0, 2, 1, 1, 2, 1, 1])

In [44]:
hand_grabs = box.take(shaker)
hand_grabs

array(['A', 'C', 'A', 'A', 'C', 'B', 'B', 'C', 'B', 'B'], 
      dtype='<U1')

シミュレーションなどランダムな数値がほしいときに有用