# Linear Regression
###     (preliminary: Linear Model)
## 1. Library and Data, Preprocessing
## 2. Compute W and b with 3 methods 

## 3. [Advanced] Regularization: Ridge, Lasso

# Library

In [1]:
import numpy as np
from sklearn.linear_model import LinearRegression
from matplotlib import pyplot as plt

import torch
import torch.nn as nn # for neural network models
import torch.optim as optim # for optimization 
import torch.nn.init as init # for initialization 

ModuleNotFoundError: No module named 'sklearn'

# simple data

In [None]:
Advacnedplt.figure()

# true data
N = 40
X = np.linspace(-np.pi/2, np.pi/2, N).reshape(N, 1) # 40 x 1
y = np.sin(X)
plt.plot(X, y)

# add noise
y += 0.1*np.random.randn(N, 1)

plt.scatter(X, y)
plt.title('sine wave data')
plt.show()

# Make feature

In [None]:
n_feature = 2

PHI = np.ones(X.shape[0]).reshape(-1, 1) # 40 x 1
for i in range(1, n_feature):
    PHI = np.concatenate((PHI, np.power(X, i)), axis=1)
print('PHI.shape: ', PHI.shape) # 40 x 2
print('PHI[:3] \n', PHI[:3])

# shuffle
idx = np.random.permutation(N)
shuffle_X = X[idx]
shuffle_PHI = PHI[idx]
shuffle_y = y[idx]

# Split train / test dataset

In [None]:
n = N-20

# train data
X_train = shuffle_X[:n]
PHI_train = shuffle_PHI[:n]
y_train = shuffle_y[:n]

print(PHI_train)
print(PHI_train.shape)

# test data
X_test = shuffle_X[:]
PHI_test = shuffle_PHI[:]
y_test = shuffle_y[:]

print(PHI_test)
print(PHI_test.shape)

In [None]:
# plot
plt.figure()
plt.scatter(X_train, y_train, label='train')
plt.legend()
plt.title('sine wave train data')
plt.show()

plt.figure()
plt.scatter(X_test, y_test, label='test')
plt.legend()
plt.title('sine wave test data')
plt.show()

# Linear Regression (Formula)
## method 1: compute W and b with numpy

In [None]:
# compute W_ with PHI_train.
W_ = np.linalg.inv(PHI_train.T @ PHI_train) @ PHI_train.T @ y_train
print('[b, W] = W_')
print('W_.shape: ', W_.shape)
print('W_ \n', W_)

In [None]:
plt.figure()
plt.scatter(X_train, PHI_train @ W_, label='pred')
plt.scatter(X_train, y_train, label='target')
plt.title('train data')
plt.show()

plt.figure()
plt.scatter(X_test, PHI_test @ W_, label='pred')
plt.scatter(X_test, y_test, label='target')
plt.legend()
plt.title('test data')
plt.show()

# Linear Regression (Formula)
## method 2: compute W and b with sklearn library

In [None]:
reg = LinearRegression()
reg.fit(X_train, y_train)

print('b: ', reg.intercept_)
print('W: ', reg.coef_)

In [None]:
plt.figure()
plt.scatter(X_train, reg.predict(X_train), label='pred')
plt.scatter(X_train, y_train, label='target')
plt.legend()
plt.title('train data')
plt.show()

plt.figure()
plt.scatter(X_test, reg.predict(X_test), label='pred')
plt.scatter(X_test, y_test, label='target')
plt.legend()
plt.title('test data')
plt.show()

# Linear Regression (Formula)
## method 3: compute W and b with pytorch library (Gradient Descent)

In [None]:
# prepare data
PHI_train = torch.tensor(PHI_train, dtype = torch.float) # if PHI_train is numpy.array
label = torch.tensor(y_train, dtype=torch.float)

# prepare model
model = nn.Linear(n_feature, 1, bias = False) # train without bias. (PHI_train term have 1, have bias already.)

# prepare optimziation and loss function 
loss_function = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr = 0.1)

# train the model 
for i in range(100):
    optimizer.zero_grad() #그라디언트 0으로 초기화 (항상 해주어야 한다.)
    output = model(PHI_train)
    
    loss = loss_function(output, label)
    
    loss.backward()
    optimizer.step()
    
    if i % 10 == 0:
        print(loss.data)
        
    param_list = list(model.parameters())


print('W_ \n', param_list[0][0][0])
print('W_ \n', param_list[0][0][1])

PHI_test = torch.tensor(PHI_test, dtype = torch.float)

y_train_pred = model(PHI_train)
y_test_pred = model(PHI_test)

y_train_pred = y_train_pred.detach().numpy()
y_test_pred = y_test_pred.detach().numpy()

=> loss가 점점 줄어들고 있는것을 확인할 수 있음

In [None]:
plt.figure()
plt.scatter(X_train, y_train_pred, label='pred')
plt.scatter(X_train, y_train, label='target')
plt.legend()
plt.title('train data')
plt.show()

plt.figure()
plt.scatter(X_test, y_test_pred, label='pred')
plt.scatter(X_test, y_test, label='target')
plt.legend()
plt.title('test data')
plt.show()

# Advanced: Regularization (Ridge, Lasso)

In [None]:
# prepare data
PHI_train = PHI_train.clone().detach() # if PHI_train is torch.tensor
label = label.clone().detach()

# prepare model
model = nn.Linear(n_feature, 1, bias = False) # train without bias. (PHI_train term have 1, have bias already.)

# prepare optimziation and loss function 
loss_function = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr = 0.1)

#train model
for i in range(100):
    optimizer.zero_grad()
    output = model(PHI_train)
    
    loss = loss_function(output, label)
    
    # L1 regularization
    regularization_loss = 0
    regularization_coefficient = 1e-3
    
    for param in model.parameters():
        regularization_loss += torch.sum(torch.abs(param))
    
    regularization_loss = regularization * regularization_coefficient
    loss = regularization_loss + loss
    
    loss.backward()
    optimizer.step()
    
    if i % 10 == 0:
        print(loss.data)
        
    param_list = list(model.parameters())


print('b \n', param_list[0][0][0])
print('W_ \n', param_list[0][0][1])

PHI_test = PHI_test.clone().detach()

y_train_pred = model(PHI_train)
y_test_pred = model(PHI_test)

y_train_pred = y_train_pred.detach().numpy()
y_test_pred = y_test_pred.detach().numpy()

In [None]:
plt.figure()
plt.scatter(X_train, y_train_pred, label='pred')
plt.scatter(X_train, y_train, label='target')
plt.legend()
plt.title('train data')
plt.show()

plt.figure()
plt.scatter(X_test, y_test_pred, label='pred')
plt.scatter(X_test, y_test, label='target')
plt.legend()
plt.title('test data')
plt.show()

In [None]:
# prepare data
PHI_train = PHI_train.clone().detach() # same with PHI_train = torch.tensor(PHI_train, dtype = torch.float)
label = label.clone().detach()

# prepare model 
model = nn.Linear(n_feature, 1, bias = False) # train without bias. (PHI_train term have 1, have bias already.)

# prepare loss function
loss_function = nn.MSELoss()

# L2 reguliarzation
optimizer = optim.SGD(model.parameters(), lr = 0.1, weight_decay = 0.01) #weight_decay : lambda값. 

# train the model
for i in range(100):
    optimizer.zero_grad()
    output = model(PHI_train)
    
    loss = loss_function(output, label)
    
    loss.backward()
    optimizer.step()
    
    if i % 10 == 0:
        print(loss.data)
        
    param_list = list(model.parameters())


print('b \n', param_list[0][0][0])
print('W_ \n', param_list[0][0][1])

PHI_test = PHI_test.clone().detach() # same with PHI_train = torch.tensor(PHI_train, dtype = torch.float)

y_train_pred = model(PHI_train)
y_test_pred = model(PHI_test)

y_train_pred = y_train_pred.detach().numpy()
y_test_pred = y_test_pred.detach().numpy()

In [None]:
plt.figure()
plt.scatter(X_train, y_train_pred, label='pred')
plt.scatter(X_train, y_train, label='target')
plt.legend()
plt.title('train data')
plt.show()

plt.figure()
plt.scatter(X_test, y_test_pred, label='pred')
plt.scatter(X_test, y_test, label='target')
plt.legend()
plt.title('test data')
plt.show()