# 離散型確率変数

In [None]:
import numpy as np
import matplotlib.pyplot as plt

%precision 3
%matplotlib inline
from matplotlib import rcParams
rcParams['font.family'] = 'sans-serif'
rcParams['font.sans-serif'] = ['Hiragino Maru Gothic Pro', 'Yu Gothic', 'Meirio', 'Takao', 'IPAexGothic', 'IPAPGothic', 'VL PGothic', 'Noto Sans CJK JP']

In [None]:
# サイコロがとりうる値の集合
x_set = np.array([1, 2, 3, 4, 5, 6])

In [None]:
# 確率質量関数
def f(x):
    if x in x_set:
        return x / 21
    else:
        return 0

In [None]:
# 確率変数Xの振る舞いは、とりうる値の集合x_setと確率関数fによって決まる。
X = [x_set, f]

In [None]:
prob = np.array([f(x_k) for x_k in x_set])
dict(zip(x_set, prob))

In [None]:
# とりうる値と確率の対応
fig = plt.figure(figsize=(10, 6))
ax  = fig.add_subplot(111)
ax.bar(x_set, prob)
ax.set_xlabel('とりうる値')
ax.set_ylabel('確率')

plt.show()

## 確率の性質

確率は必ず0以上  
すべての確率を足すと1にならなければならない

In [None]:
np.all(prob >= 0)

In [None]:
np.sum(prob)

## 累積分布関数(Cumulative distribution function)

確率変数Xがx以下になるときの確率

In [None]:
def F(x):
    return np.sum([f(x_k) for x_k in x_set if x_k <= x])

In [None]:
round(F(3), 3)

## 確率変数の変換

In [None]:
y_set = np.array([2 * x_k + 3 for x_k in x_set])
prob = np.array([f(x_k) for x_k in x_set])
dict(zip(y_set, prob))

## 1次元の離散型確率変数の指標

## 確率変数の期待値

In [None]:
# 確率変数の期待値（平均）
round(np.sum([x_k * f(x_k) for x_k in x_set]), 3)

In [None]:
sample = np.random.choice(x_set, int(1e6), p=prob)
round(np.mean(sample), 3)

In [None]:
dice = np.array([1, 2, 3, 4, 5, 6])
round(np.sum([x_k * (1/6) for x_k in dice]), 3)

In [None]:
sample2 = np.random.choice(dice, int(1e6), p=[1/6 for i in dice])
round(np.mean(sample2), 3)

In [None]:
# 離散型確率変数の期待値
# gに何も指定しなければ確率変数Xの期待値を求めることになる
def E(X, g=lambda x: x):
    x_set, f = X
    return np.sum([g(x_k) * f(x_k) for x_k in x_set])

In [None]:
E(X)

In [None]:
E(X, g=lambda x: 2*x + 3)

In [None]:
# 期待値の線形性
# 上の解と同じになる
2 * E(X) + 3

## 確率変数の分散

In [None]:
mean = E(X)
np.sum([((x_k - mean) ** 2) * f(x_k) for x_k in x_set])

In [None]:
# 分散の関数
def V(X, g=lambda x: x):
    x_set, f = X
    mean = E(X, g)
    return np.sum([((g(x_k) - mean) ** 2) * f(x_k) for x_k in x_set])

In [None]:
V(X)

In [None]:
# 確率変数 Y=2X+3 の分散
V(X, lambda x: 2 * x + 3)

In [None]:
# 分散の公式を使う
2**2 * V(X)

## 2次元の離散型確率変数

### 2次元の離散型確率変数の定義

#### 同時確率分布

In [None]:
x_set = np.arange(2, 13)
y_set = np.arange(1, 7)

In [None]:
# 同時確率関数
def f_XY(x, y):
    if 1 <= y <= 6 and 1 <= x - y <= 6:
        return y * (x - y) / 441
    else:
        return 0

In [None]:
XY = [x_set, y_set, f_XY]

In [None]:
prob = np.array([[f_XY(x_i, y_j) for y_j in y_set] for x_i in x_set])

fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111)
c  = ax.pcolor(prob)
ax.set_xticks(np.arange(prob.shape[1]) + 0.5, minor=False)
ax.set_yticks(np.arange(prob.shape[0]) + 0.5, minor=False)
ax.set_xticklabels(np.arange(1, 7), minor=False)
ax.set_yticklabels(np.arange(2, 13), minor=False)

ax.invert_yaxis()
ax.xaxis.tick_top()
fig.colorbar(c, ax=ax)
plt.show()

In [None]:
# 確率はすべて0以上
np.all(prob >= 0)

In [None]:
np.sum(prob)