In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%precision 3
%matplotlib inline

In [None]:
df = pd.read_csv('./data/ch4_scores400.csv')
scores = np.array(df['点数'])
scores[:10]

In [None]:
np.random.choice([1, 2, 3], 3)

In [None]:
np.random.choice([1, 2, 3], 3, replace=False)

In [None]:
np.random.seed(0)
np.random.choice([1, 2, 3], 3)

In [None]:
np.random.seed(0)
sample = np.random.choice(scores, 20)
sample.mean()

In [None]:
scores.mean()

In [None]:
for i in range(5):
    sample = np.random.choice(scores, 20)
    print(f'{i+1}番目の無作為抽出で得た標本平均', sample.mean())

In [None]:
dice = [1, 2, 3, 4, 5, 6]
prob = [1/21, 2/21, 3/21, 4/21, 5/21, 6/21]

In [None]:
np.random.choice(dice, p=prob)

In [None]:
num_trial = 100
sample = np.random.choice(dice, num_trial, p=prob)
sample

In [None]:
freq, _ = np.histogram(sample, bins=6, range=(1, 7))
pd.DataFrame({'度数': freq, '相対度数': freq / num_trial}, index = pd.Index(np.arange(1, 7), name='出目'))

In [None]:
fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(111)
ax.hist(sample, bins=6, range=(1, 7), density=True, rwidth=0.8)
ax.hlines(prob, np.arange(1, 7), np.arange(2, 8), colors='gray')
ax.set_xticks(np.linspace(1.5, 6.5, 6))
ax.set_xticklabels(np.arange(1, 7))
ax.set_xlabel('出目')
ax.set_ylabel('相対度数')
plt.show()

In [None]:
num_trial2 = 10000
sample2 = np.random.choice(dice, num_trial2, p=prob)

fig = plt.figure(figsize=(10,6))
ax = fig.add_subplot(111)
ax.hist(sample2, bins=6, range=(1, 7), density=True, rwidth=0.8)
ax.hlines(prob, np.arange(1, 7), np.arange(2, 8), colors='gray')
ax.set_xticks(np.linspace(1.5, 6.5, 6))
ax.set_xticklabels(np.arange(1, 7))
ax.set_xlabel('出目')
ax.set_ylabel('相対度数')
plt.show()

In [None]:
import matplotlib.animation as ani
from IPython.display import HTML

%precision 3
%matplotlib inline

num_frame = 50
space = np.logspace(2, 5, num_frame).astype(int)
sample3 = np.random.choice(dice, size=100000, p=prob)

def animate(nframe):
    num_trial = space[nframe]
    ax.clear()
    ax.hist(sample3[:num_trial], bins=6, range=(1, 7), density=True, rwidth=0.8)
    ax.hlines(prob, np.arange(1, 7), np.arange(2, 8), colors='gray')
    ax.set_title(f'試行回数:{num_trial}')
    ax.set_xticks(np.linspace(1.5, 6.5, 6))
    ax.set_xticklabels(np.arange(1, 7))
    ax.set_ylim(0, 0.3)
    ax.set_xlabel('出目', fontsize=12)
    ax.set_ylabel('相対度数', fontsize=12)
    
fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111)
anim = ani.FuncAnimation(fig, animate, frames=num_frame)
js_anim = HTML(anim.to_jshtml())
plt.close()

js_anim

In [None]:
scores

In [None]:
pd.Series(scores).describe()

In [None]:
fig = plt.figure(figsize = (10, 6))
ax = fig.add_subplot(111)
ax.hist(scores, bins=100, range=(0, 100), density=True)
ax.set_xlim(20, 100)
ax.set_ylim(0, 0.042)
ax.set_xlabel('点数')
ax.set_ylabel('相対度数 ')
plt.show()

In [None]:
np.random.choice(scores)

In [None]:
sample = np.random.choice(scores, 10000)

fig = plt.figure(figsize = (10, 6))
ax = fig.add_subplot(111)
ax.hist(sample, bins=100, range=(0, 100), density=True)
ax.set_xlim(20, 100)
ax.set_ylim(0, 0.042)
ax.set_xlabel('点数')
ax.set_ylabel('相対度数 ')
plt.show()

In [None]:
sample_means = [np.random.choice(scores, 20).mean() for _ in range(10)]
sample_means

In [None]:
sample_means = [np.random.choice(scores, 20).mean() for _ in range(10000)]

fig = plt.figure(figsize=(10, 6))
ax = fig.add_subplot(111)
ax.hist(sample_means, bins=100, range=(0, 100), density=True)
ax.vlines(np.mean(scores), 0, 1, 'gray')
ax.set_xlim(50, 90)
ax.set_ylim(0, 0.13)
ax.set_xlabel('点数')
ax.set_ylabel('相対度数 ')