# Numpy 101

Numpyを書籍『Pythonによるデータ分析の教科書』に沿って学ぶ。


In [26]:
# 前提モジュールの確認
!pip list | grep -e "numpy" -e "pandas" -e "scipy" -e "scikit-learn" -e "matplotlib" -e "xlrd" -e "openpyxl" -e "lxml" | awk '{print $1 " = " $2}'

lxml = 4.3.1
matplotlib = 3.0.2
numpy = 1.15.4
numpydoc = 0.8.0
openpyxl = 2.6.0
pandas = 0.24.1
scikit-learn = 0.20.3
scipy = 1.2.0
xlrd = 1.2.0


In [6]:
# Numpyのインストール
import numpy as np

In [17]:
# 1次元配列
a = np.array([1, 2, 3])
print("a={}, type={}, item-data-type={}, shape={}".format(a, type(a), a.dtype, a.shape))

a=[1 2 3], type=<class 'numpy.ndarray'>, item-data-type=int64, shape=(3,)


In [10]:
# 2次元以上の配列も作れる
b = np.array([[1, 2, 3], [4, 5, 6]])
print("b={}, shape={}".format(b, b.shape))

b=[[1 2 3]
 [4 5 6]], shape=(2, 3)


In [16]:
# 変形も可能。ただし、要素数が収まるようにしないとValueErrorとなる!!
c1 = np.array([0, 1, 2, 3, 4, 5])
c2 = c1.reshape((2, 3))  # 2 * 3行列に変換
print("c2={}".format(c2))

# 全て平坦化 ... ravel()は参照を返し、flattenはコピーを返す!!
c3 = c2.ravel()
print("c3={}".format(c3))
c4 = c2.flatten()
print("c4={}".format(c4))

c2=[[0 1 2]
 [3 4 5]]
c3=[0 1 2 3 4 5]
c4=[0 1 2 3 4 5]


In [24]:
# Indexをとって値を取得
a2 = np.array([[1, 2, 3, 4], [6, 7, 8, 9]])
print(a2[0])
print(a2[1, 2])

# Indexでスライスもできる
print(a2[:, 3])  # 行でスライス
print(a2[1, :3])  # 列でスライス

[1 2 3 4]
8
[4 9]
[6 7 8]


In [28]:
# 値の再代入
a3 = np.array([[1, 2, 3], [4, 5, 6]])
a3[0, 2] = 4
a3[:, 1] = 9
print(a3)

[[1 9 4]
 [4 9 6]]


In [29]:
# 数列生成
print(np.arange(10))
print(np.arange(1, 11))
print(np.arange(1, 12, 2))

[0 1 2 3 4 5 6 7 8 9]
[ 1  2  3  4  5  6  7  8  9 10]
[ 1  3  5  7  9 11]


In [38]:
# 乱数生成
f = np.random.random((3, 2))
print(f)

np.random.seed(123)  # シードを固定。実行ごとにシードはリセットされるみたいだ。
print(np.random.random((2, 3)))
np.random.seed(123)  # 再度シードを固定
print(np.random.rand(4, 2))

[[0.39804426 0.73799541]
 [0.18249173 0.17545176]
 [0.53155137 0.53182759]]
[[0.69646919 0.28613933 0.22685145]
 [0.55131477 0.71946897 0.42310646]]
[[0.9807642  0.68482974]
 [0.4809319  0.39211752]
 [0.34317802 0.72904971]
 [0.43857224 0.0596779 ]]


In [42]:
# 乱数生成 (続き)
print(np.random.randint(1, 100))
print(np.random.randint(1, 10, (3, 3)))
print(np.random.uniform(0.0, 5.0, size=(2, 3)))

# 出現する値が正規分布に従う乱数の生成
np.random.seed(123)
n = np.random.randn(4, 2)
print(n)

81
[[7 5 8]
 [7 8 2]
 [6 8 3]]
[[3.06447263 0.60314333 4.131704  ]
 [3.01530064 2.72534003 1.71381917]]
[[-1.0856306   0.99734545]
 [ 0.2829785  -1.50629471]
 [-0.57860025  1.65143654]
 [-2.42667924 -0.42891263]]


In [49]:
# 同じ値で埋める
print(np.zeros(3))
print(np.zeros((2, 3)))
print(np.ones(5))
print(np.eye(3))  # 単位行列
print(np.full(3, 3.14))
print(np.full((2, 4), np.pi))

[0. 0. 0.]
[[0. 0. 0.]
 [0. 0. 0.]]
[1. 1. 1. 1. 1.]
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
[3.14 3.14 3.14]
[[3.14159265 3.14159265 3.14159265 3.14159265]
 [3.14159265 3.14159265 3.14159265 3.14159265]]


In [52]:
# 均等にデータを分割する
five_divided = np.linspace(0, 1, 5)  # 0~1を5等分
print(five_divided)

pi_divided = np.linspace(0, np.pi, 20)
print(pi_divided)

[0.   0.25 0.5  0.75 1.  ]
[0.         0.16534698 0.33069396 0.49604095 0.66138793 0.82673491
 0.99208189 1.15742887 1.32277585 1.48812284 1.65346982 1.8188168
 1.98416378 2.14951076 2.31485774 2.48020473 2.64555171 2.81089869
 2.97624567 3.14159265]


In [57]:
# 行列を連結
a = np.array([1, 5, 4])
a1 = np.array([2, 3, 7])
print(np.concatenate([a, a1]))

b = np.array([[1, 2, 8], [3, 2, 1]])
b1 = np.array([[10], [20]])
print(np.concatenate([b, b1], axis=1))  # 列方向に連結
print(np.hstack([b, b1]))  # 列方向に連結

b2 = np.array([30, 40, 50])
print(np.vstack([b, b2]))

[1 5 4 2 3 7]
[[ 1  2  8 10]
 [ 3  2  1 20]]
[[ 1  2  8 10]
 [ 3  2  1 20]]
[[ 1  2  8]
 [ 3  2  1]
 [30 40 50]]


In [58]:
# 行列の入れ替え
print(b)
print(b.T)

[[1 2 8]
 [3 2 1]]
[[1 3]
 [2 2]
 [8 1]]


In [61]:
# 次元追加
print(a)
print(a[np.newaxis, :])
print(a[:, np.newaxis])

[1 5 4]
[[1 5 4]]
[[1]
 [5]
 [4]]


In [62]:
# グリッドデータの作成
m = np.arange(0, 4)
n = np.arange(4, 7)
xx, yy = np.meshgrid(m, n)
print(xx)
print(yy)

[[0 1 2 3]
 [0 1 2 3]
 [0 1 2 3]]
[[4 4 4 4]
 [5 5 5 5]
 [6 6 6 6]]


In [64]:
# 行列の中身に対して直接計算できる
x = np.array([[0, 1, 2], [3, 5, 7]])  # 2*3行列
print(x - np.mean(x))

[[-3. -2. -1.]
 [ 0.  2.  4.]]


In [67]:
# ドット積 (内積)
print(x)
y = np.array([[2], [4], [5]])  # 3*1行列
print(y)
print(np.dot(x, y))  # 1*2行列
print(x @ y)

[[0 1 2]
 [3 5 7]]
[[2]
 [4]
 [5]]
[[14]
 [61]]
[[14]
 [61]]
