<a href="https://colab.research.google.com/github/naomori/codexa_LinearRegression_Introduction/blob/master/Chapter24.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 重回帰分析のコスト関数


In [0]:
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

In [0]:
# csvファイルの読み込み
data = np.loadtxt('007-02/007-02.csv', delimiter=',', skiprows=1)
X = data[:, :3]
y = data[:, 3]
m = len(y)

In [0]:
# 正規化(Z-score normalization)

def norm(X):
  X_norm = np.zeros(X.shape)
  mean = np.zeros((1,X.shape[1]))
  std  = np.zeros((1,X.shape[1]))
  for i in range(X.shape[1]):
    mean[:, i] = np.mean(X[:, i])
    std[:, i] = np.std(X[:, i])
    X_norm[:, i] = (X[:, i] - float(mean[:, i]))/float(std[:, i])
  return X_norm, mean, std

In [0]:
X_norm, mean, std = norm(X)

$\hat{y} = XW = w_1x_1 + w_2x_2 ... w_nx_n + w_0x_0$

n = 3

$X = \begin{pmatrix}
x_0^{(1)} & x_1^{(1)} & x_2^{(1)} &x_3^{(1)} \\
x_0^{(2)} & x_1^{(2)} & x_2^{(2)} &x_3^{(2)} \\
x_0^{(3)} & x_1^{(3)} & x_2^{(3)} &x_3^{(3)} \\
x_0^{(4)} & x_1^{(4)} & x_2^{(4)} &x_3^{(4)}
\end{pmatrix}$

$W = \begin{pmatrix}
w_0 \\
w_1 \\
w_2 \\
w_3 \end{pmatrix}$

$\begin{pmatrix}
\hat{y}^{(1)} \\
\hat{y}^{(2)} \\
\hat{y}^{(3)} \\
\hat{y}^{(4)}
\end{pmatrix}$

## まずは正規化する

In [59]:
# モデル式のパラメタ初期値
weight_init = np.zeros((4,1))
print(weight_init)

[[0.]
 [0.]
 [0.]
 [0.]]


In [60]:
# 特徴量の前処理
X_padded = np.column_stack((np.ones((m,1)), X_norm))
X_padded.shape

(99, 4)

In [61]:
X_padded[0:10, :]

array([[ 1.        , -1.28095057, -0.76202794, -0.45626449],
       [ 1.        , -0.10238081, -0.05223579, -1.002678  ],
       [ 1.        , -1.75237847, -1.57321897, -1.73122936],
       [ 1.        ,  1.07618895,  1.36734852,  1.36511391],
       [ 1.        ,  0.60476105,  1.36734852,  0.36335579],
       [ 1.        ,  0.01547617, -0.66062906, -0.54733341],
       [ 1.        , -1.04523661, -1.57321897, -1.73122936],
       [ 1.        ,  0.25119012, -0.35643242, -0.72947124],
       [ 1.        , -0.57380871, -0.35643242, -1.27588476],
       [ 1.        ,  1.66547383,  0.96175301,  0.54549363]])

## コスト関数

In [0]:
# コスト関数

def cost(X, y, weight):
  m = len(y)
  J = 0
  y_hat = X.dot(weight)
  diff = np.power((y_hat - np.transpose([y])), 2)
  J = (1.0/(2*m)) * diff.sum(axis=0)
  return J

In [63]:
# コスト関数の確認

weight_test = np.array([[20], [10], [5], [-1]])
cost(X_padded, y, weight_test)

array([1541.34609465])

コスト関数を行列を使って計算してみる

$J(W) = \frac{1}{2m}(XW-y)^{T}(XW-y)$

In [0]:
def cost_2(X, y, weight):
  m = len(y)
  J = 0
  y_shaped = y.reshape(len(y), 1)
  model = np.dot(X, weight)
  delta = np.dot((model - y_shaped).T, (model - y_shaped))
  J = delta / (m * 2)
  return J

In [67]:
cost_2(X_padded, y, weight_test)

array([[1541.34609465]])