### 제곱오차를 사용하는 이유

In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl 
import math
import scipy
from scipy.stats.stats import pearsonr

import sys
sys.path.append("../")
from credit_tools.tools import *

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [19]:
from sklearn.datasets import load_boston
boston = load_boston()
dataset = pd.DataFrame(boston.data, columns=boston.feature_names)
dataset['target'] = boston.target
dataset

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,22.0


In [2]:
x = np.array([9.5, 8.5, 8.0, 7.0, 6.0])

In [3]:
def squared_cost(v, e):
    return np.sum((v - e) ** 2)

In [4]:
from scipy.optimize import fmin

In [8]:
xopt = fmin(squared_cost, x0=0, xtol=1e-8, args=(x,))
xopt

Optimization terminated successfully.
         Current function value: 7.300000
         Iterations: 44
         Function evaluations: 88


array([7.8])

In [9]:
print('The result of optmization is %0.1f' % xopt[0])
print('The mean is %0.1f' % (np.mean(x)))

The result of optmization is 7.8
The mean is 7.8


In [13]:
def absolute_cost(v, e):
    return np.sum(np.abs(v - e))

In [14]:
xopt = fmin(absolute_cost, x0=0, xtol=1e-8, args=(x,))
xopt

Optimization terminated successfully.
         Current function value: 5.000000
         Iterations: 44
         Function evaluations: 88


array([8.])

In [16]:
print('The result of optmization is %0.1f' % xopt[0])
print('The medain is %0.1f' % (np.median(x)))

The result of optmization is 8.0
The medain is 8.0


### 의사 역행렬 및 기타 최적화 방법

In [20]:
observations = len(dataset)
observations

506

In [24]:
X = dataset['RM'].values.reshape((observations, 1))
Xb = np.column_stack((X, np.ones(observations)))
y = dataset['target'].values

In [28]:
def matrix_inverse(X, y, pseudo=False):
    if pseudo:
        return np.dot(np.linalg.pinv(np.dot(X.T, X)), np.dot(X.T, y))
    else:
        return np.dot(np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, y))

def normal_equations(X, y):
    return np.linalg.solve(np.dot(X.T, X), np.dot(X.T, y))

print(matrix_inverse(Xb, y))
print(matrix_inverse(Xb, y, pseudo=True))
print(normal_equations(Xb, y))

[  9.10210898 -34.67062078]
[  9.10210898 -34.67062078]
[  9.10210898 -34.67062078]


### 작업에서의 기울기 하강

In [29]:
observations = len(dataset)
X = dataset['RM'].values.reshape((observations, 1))
X = np.column_stack((X, np.ones(observations)))
y = dataset['target'].values

In [97]:
import random

In [98]:
def random_w(n):
    """normal distribtion에 해당하는 n개의 값을 return 한다."""
    return np.array([np.random.normal() for j in range(n)])

In [99]:
def hypothesis(X, w):
    return np.dot(X, w)

In [106]:
def loss(X, w, y):
    return hypothesis(X, w) - y

In [107]:
def squared_loss(X, w, y):
    return loss(X, w, y) ** 2

In [108]:
def gradient(X, w, y):
    gradients = list()
    n = float(len(y))
    for j in range(len(w)):
        gradients.append(np.sum(loss(X, w, y) * X[:,j]) / n)
    return gradients

In [109]:
def update(X, w, y, alpha=0.01):
    return [t - alpha*g for t, g in zip(w, gradient(X, w, y))]

In [110]:
def optimize(X, y, alpha=0.01, eta=10**-12, iterations=1000):
    w = random_w(X.shape[1])
    path = list()
    for k in range(iterations):
        SSL = np.sum(squared_loss(X, w, y))
        new_w = update(X, w, y, alpha=alpha)
        new_SSL = np.sum(squared_loss(X, new_w, y))
        w = new_w
        
        ## path 에 추가
        if k >= 5 and (new_SSL - SSL <= eta and new_SSL - SSL >= -eta):
            path.append(new_SSL)
            return w, path
        if k % (iterations / 20) == 0:
            path.append(new_SSL)
    return w, path

In [112]:
alpha = 0.048
w, path = optimize(X, y, alpha, eta=10**-12, iterations=25000)
print("These are our final coefficents: %s" % w)
print("Obtained walking on this path of squared loss %s" % path)

These are our final coefficents: [9.102103335618773, -34.670584864355405]
Obtained walking on this path of squared loss [525749.3732441342, 23925.36501520338, 22501.958089443782, 22165.807774736262, 22086.422862359457, 22067.67540315045, 22063.248022477343, 22062.20245681396, 22061.955537102927, 22061.897224798082, 22061.88345382418, 22061.880201685155, 22061.87943366335, 22061.879252288112, 22061.879209454724, 22061.879199339237, 22061.87919695037, 22061.87919638622, 22061.879196252987, 22061.87919622153, 22061.879196219845]
