In [None]:
import numpy as np
import pandas as pd

%precision 3
#pd.set_option('precision', 3)
pd.options.display.precision = 3

In [None]:
df = pd.read_csv('./data/ch2_scores_em.csv', index_col="生徒番号")

In [None]:
en_scores = np.array(df['英語'])[:10]
ma_scores = np.array(df['数学'])[:10]
scores_df = pd.DataFrame({'英語':en_scores, '数学':ma_scores}, index=pd.Index(['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'], name='生徒'))
scores_df

In [None]:
df

In [None]:
summary_df = scores_df.copy()

In [None]:
summary_df

In [None]:
summary_df['英語の偏差'] = summary_df['英語'] - summary_df['英語'].mean()
summary_df['数学の偏差'] = summary_df['数学'] - summary_df['数学'].mean()

In [None]:
summary_df

In [None]:
summary_df['偏差同士の積'] = summary_df['英語の偏差'] * summary_df['数学の偏差']
summary_df

In [None]:
summary_df['偏差同士の積'].mean()

In [None]:
cov_mat = np.cov(en_scores, ma_scores, ddof=0)

In [None]:
cov_mat

In [None]:
cov_mat[0, 1], cov_mat[1,0]

In [None]:
cov_mat[0, 0], cov_mat[1,1]

In [None]:
np.var(en_scores, ddof=0), np.var(ma_scores, ddof=0)

In [None]:
np.cov(en_scores, ma_scores, ddof=0)[0, 1] / (np.std(en_scores) * np.std(ma_scores))

In [None]:
np.corrcoef(en_scores, ma_scores)

In [None]:
scores_df.corr()

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
english_scores = np.array(df['英語'])
math_scores = np.array(df['数学'])

In [None]:
english_scores

In [None]:
math_scores

In [None]:
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111)
ax.scatter(english_scores, math_scores)
ax.set_xlabel('英語')
ax.set_ylabel('数学')

plt.show()

In [None]:
poly_fit = np.polyfit(english_scores, math_scores, 1)
poly_1d = np.poly1d(poly_fit)

#print(poly_fit)

xs = np.linspace(english_scores.min(), english_scores.max())
ys = poly_1d(xs)

fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111)
ax.set_xlabel('英語')
ax.set_ylabel('数学')
ax.scatter(english_scores, math_scores, label='点数')
ax.plot(xs, ys, color = 'gray', label=f'{poly_fit[1]:.2f}+{poly_fit[0]:.2f}x')

ax.legend(loc='upper left')

plt.show()

In [None]:
fig =plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111)

c = ax.hist2d(english_scores, math_scores, bins=[9, 8], range=[(35, 80), (55, 95)])

ax.set_xlabel('英語')
ax.set_ylabel('数学')

ax.set_xticks(c[1])
ax.set_yticks(c[2])

fig.colorbar(c[3], ax=ax)

plt.show()

In [None]:
anscombe_data = np.load('./data/ch3_anscombe.npy')
anscombe_data
print(anscombe_data.shape)
anscombe_data[0]

In [None]:
stats_df = pd.DataFrame(index=['Xの平均', 'Xの分散', 'Yの平均', 'Yの分散', 'XとYの相関係数', 'XとYの回帰直線'])
stats_df

In [None]:
for i, data in enumerate(anscombe_data):
    dataX = data[:, 0]
    dataY = data[:, 1]
    poly_fit = np.polyfit(dataX, dataY, 1)
    stats_df[f'data{i+1}'] =\
        [f'{np.mean(dataX):.2f}',
         f'{np.var(dataX):.2f}',
         f'{np.mean(dataY):.2f}',
         f'{np.var(dataY):.2f}',
         f'{np.corrcoef(dataX, dataY)[0, 1]:.2f}',
         f'{poly_fit[1]:.2f}+{poly_fit[0]:.2f}x']
stats_df

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(10, 10), sharex=True, sharey=True)

xs = np.linspace(0, 30, 100)
for i, data in enumerate(anscombe_data):
    poly_fit = np.polyfit(data[:,0], data[:,1], 1)
    poly_1d = np.poly1d(poly_fit)
    ys = poly_1d(xs)
    
    ax = axes[i//2, i%2]
    ax.set_xlim([4, 20])
    ax.set_ylim([3, 13])
    
    ax.set_title(f'data{i+1}')
    ax.scatter(data[:,0], data[:,1])
    ax.plot(xs, ys, color='gray')

plt.tight_layout()
plt.show()

In [None]:
english_scores

In [None]:
math_scores

In [None]:
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111)

ax.scatter(english_scores, math_scores)
ax.set_xlabel('英語')
ax.set_ylabel('数学')

plt.show()

In [None]:
fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(111)

x= np.linspace(-10,10,20)
y= x**3 +  2*x**2 + 3*x + 10 + np.random.randn(20)*50

#近似式の係数
poly_fit1d = np.polyfit(x, y, 1)
poly_fit2d = np.polyfit(x, y, 2)
poly_fit3d = np.polyfit(x, y, 3)

print(poly_fit1d)
print(poly_fit2d)
print(poly_fit3d)

#近似式の計算
y1 = np.poly1d(poly_fit1d)(x) #1次
y2 = np.poly1d(poly_fit2d)(x) #2次
y3 = np.poly1d(poly_fit3d)(x) #3次

#y4 = np.poly1d([50, 300])(x)

#グラフ表示
plt.scatter(x, y, label='元データ')
plt.plot(x, y1, label='1次')
plt.plot(x, y2, label='2次')
plt.plot(x, y3, label='3次')
#plt.plot(x, y4, label='ただの関数')
plt.legend()
plt.show()

In [None]:
#通常のforループ

l = ['Alice', 'Bob', 'Charlie']

for name in l:
    print(name)

In [None]:
#enumerate関数を使ったforループ

for i, name in enumerate(l):
    print(i, name)

In [None]:
#enumerate関数を使ったforループ

for i, name in enumerate(l, 12):
    print(i, name)