# 회귀분석

## 독립변수가 1개인 선형회귀

In [15]:
import numpy as np

x_data = np.array([1, 2, 3, 4, 5]).reshape(5, 1) # 독립변수
t_data = np.array([2, 3, 4, 5, 6]).reshape(5, 1) # 종속변수

W = np.random.rand(1, 1)
b = np.random.rand(1)

In [16]:
def loss_func(x, t):
    # y : 예측값
    y = np.dot(x, W) + b
    # t : 정답
    return (np.sum((t - y) ** 2)) / (len(x)) # MSE

In [17]:
def numerical_derivative(f, x):
    delta_x = 1e-4
    grad = np.zeros_like(x)
    it = np.nditer(x, flags = ['multi_index'], op_flags = ['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + delta_x
        fx1 = f(x) # f(x + delta_x)

        x[idx] = float(tmp_val) - delta_x
        fx2 = f(x)
        grad[idx] = (fx1 - fx2) / (2 * delta_x)
        x[idx] = tmp_val
        it.iternext()

    return grad

In [18]:
learning_rate = 1e-2
f = lambda x : loss_func(x_data, t_data)
print('Initial loss value = ', loss_func(x_data, t_data), 'Initial W = ', W, '\n',
      ', b = ', b)
for step in range(6001):
    W -= learning_rate * numerical_derivative(f, W)
    b -= learning_rate * numerical_derivative(f, b)

    if (step % 300 == 0):
        print('step = ', step, 'loss value = ', loss_func(x_data, t_data), 'W = ', W,
              'b = ', b)

Initial loss value =  1.0168471317572076 Initial W =  [[0.83689448]] 
 , b =  [0.50766472]
step =  0 loss value =  0.6114819942128054 W =  [[0.90231781]] b =  [0.52337236]
step =  300 loss value =  0.0042685472196382765 W =  [[1.04242646]] b =  [0.84686451]
step =  600 loss value =  0.0005419082719424395 W =  [[1.01511679]] b =  [0.94543694]
step =  900 loss value =  6.879731208045901e-05 W =  [[1.0053862]] b =  [0.98055887]
step =  1200 loss value =  8.73407990716295e-06 W =  [[1.00191913]] b =  [0.99307301]
step =  1500 loss value =  1.108824596743887e-06 W =  [[1.0006838]] b =  [0.99753187]
step =  1800 loss value =  1.4076949139607273e-07 W =  [[1.00024364]] b =  [0.99912059]
step =  2100 loss value =  1.7871221260884625e-08 W =  [[1.00008681]] b =  [0.99968666]
step =  2400 loss value =  2.2688193740723693e-09 W =  [[1.00003093]] b =  [0.99988836]
step =  2700 loss value =  2.880352314404119e-10 W =  [[1.00001102]] b =  [0.99996022]
step =  3000 loss value =  3.656716594906456e-11

In [28]:
def predict(x):
    y = np.dot(x, W) + b
    return y

In [29]:
predict(np.array([43]))

ValueError: shapes (1,) and (3,1) not aligned: 1 (dim 0) != 3 (dim 0)

## 독립변수가 2개 이상 선형회귀

In [23]:
import numpy as np

loaded_data = np.loadtxt('../AI데이터/data_01.csv', delimiter = ',', skiprows = 0, dtype = np.float32)
x_data = loaded_data[:, 0:-1]
t_data = loaded_data[:, [-1]]

print('x_data.ndim = ', x_data.ndim, ', x_data.shape = ', x_data.shape)
print('t_data.ndim = ', t_data.ndim, ', t_data.shape = ', t_data.shape)

x_data.ndim =  2 , x_data.shape =  (9, 3)
t_data.ndim =  2 , t_data.shape =  (9, 1)


In [24]:
np.random.seed(42)
W = np.random.rand(3, 1)
b = np.random.rand(1)

print('W = ', W, 'W.shape = ', W.shape, ', b = ', b, 'b.shape = ', b.shape)

W =  [[0.37454012]
 [0.95071431]
 [0.73199394]] W.shape =  (3, 1) , b =  [0.59865848] b.shape =  (1,)


In [25]:
learning_rate = 1e-5
f = lambda x : loss_func(x_data, t_data)
print('Initial loss value = ', loss_func(x_data, t_data))

for step in range(30001):
    W -= learning_rate * numerical_derivative(f, W)
    b -= learning_rate * numerical_derivative(f, b)

    if(step % 1000 == 0):
        print('step = ', step, 'loss value = ', loss_func(x_data, t_data),
              'W = ', W, 'b = ', b)

Initial loss value =  18.912018958218844
step =  0 loss value =  11.964774630423404 W =  [[0.36933621]
 [0.94535252]
 [0.72658915]] b =  [0.59861768]
step =  1000 loss value =  5.7262478709054525 W =  [[0.40606364]
 [0.83350263]
 [0.77156443]] b =  [0.59992002]
step =  2000 loss value =  4.88666185645906 W =  [[0.43237927]
 [0.75918827]
 [0.8174874 ]] b =  [0.60100567]
step =  3000 loss value =  4.418931926580752 W =  [[0.44618706]
 [0.70489027]
 [0.85626088]] b =  [0.60182856]
step =  4000 loss value =  4.145376760270418 W =  [[0.45212945]
 [0.66476697]
 [0.88901644]] b =  [0.60241728]
step =  5000 loss value =  3.977693643264539 W =  [[0.45326082]
 [0.63476395]
 [0.91670093]] b =  [0.60279985]
step =  6000 loss value =  3.870565094708016 W =  [[0.45157271]
 [0.61205328]
 [0.94010838]] b =  [0.60300253]
step =  7000 loss value =  3.7997825461522883 W =  [[0.44834804]
 [0.59465101]
 [0.95990588]] b =  [0.60304931]
step =  8000 loss value =  3.7518023643534186 W =  [[0.44439943]
 [0.581

In [30]:
test_data = np.array([100, 98, 81])
predict(test_data)

array([179.13680055])

In [31]:
from sklearn.linear_model import LinearRegression

loaded_data = np.loadtxt('../AI데이터/data_01.csv', delimiter = ',', skiprows = 0, dtype = np.float32)
x_data = loaded_data[:, 0:-1]
t_data = loaded_data[:, [-1]]

model = LinearRegression().fit(x_data, t_data)
print(model.coef_, model.intercept_)

[[0.38179067 0.5078318  1.127576  ]] [-1.0562897]


In [34]:
test_data = np.array([[100, 98, 81]])
pred_data = model.predict(test_data)
pred_data

array([[178.22395015]])