In [None]:
# Basic setting
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

# Optimization
## [문제 1] 선형 회귀 모델

In [None]:
# Load the dataset
with open('./data/linear_regression.pickle', 'rb') as f:
    X, y = pickle.load(f)

In [None]:
# Check the dataset distribution    
plt.scatter(X, y)
plt.show()

## [P1.1] 경사 하강법을 이용해 최적의 선형 회귀 모델을 찾아주세요. 단, <font color=red>Tensorflow</font>는 사용하지 마세요.

### 최적의 모델이란, 모든 데이터에 대해서 실제값과 예측값의 차이가 제일 작은 모델을 말합니다.
### 선형 회귀 모델은  일반적으로 <font color=red>MSE(Mean Squared Error)</font> 손실을 최소화하도록 학습해야합니다.
### 손실 함수와 모델 파라미터의 gradient에 관한 빈칸 부분을 채워넣어주세요.

**MSE 손실 함수**
\begin{equation*}
\left( \frac{1}{n} \sum_{i=1}^n (y_i - (wX_i + b))^2 \right)
\end{equation*}

**Gradients 계산**
\begin{equation*}
\frac{\partial L}{\partial w} = -2 * \frac{1}{n} \sum_{i=1}^n (y_i - (wX_i + b)) * X_i
\end{equation*}

\begin{equation*}
\frac{\partial L}{\partial b} = -2 * \frac{1}{n} \sum_{i=1}^n (y_i - (wX_i + b))
\end{equation*}

In [None]:
# Training setting
epochs = 1000
learning_rate = 1e-7

# Model weights and bias parameters
# f(x) = w * X + b
w = 0.0
b = 0.0

# Store model parameters and loss for visualization
w_list, b_list, loss_list = [], [], []

# Perform Gradient Descent
for i in range(epochs):
    
    
#################################################
######## Hint: use +, -, *, /, np.mean() ########
    # MSE loss
    # a**2 = a * a = square of a, a**3 = a * a * a
    loss = 
    
    # derivative w.r.t to w
    dw = 
    # derivative w.r.t to b
    db =
#################################################
    
    # update w and b
    w = w - learning_rate * dw
    b = b - learning_rate * db
    
    w_list.append(w)
    b_list.append(b)
    loss_list.append(loss)

print('Trained model weights : %.4f' % w)
print('Trained model bias : %.4f' % b)

### 학습 결과를 그려보면 다음과 같습니다.

In [None]:
# Visualize the trained linear regression model
plt.scatter(X, y)   # scatter the original data
y_pred = w * X + b
plt.plot(X, y_pred, color='red')
plt.show()

In [None]:
# Visualize the intermediate trained model
epochs_list = [1, 200, 400, 600, 800, 1000]

for i in range(len(epochs_list)):
    plt.scatter(X, y)   # scatter the original data
    
    # Load trained weights in specific epoch
    epoch = epochs_list[i] - 1   # In python, all indexes start from 0
    w = w_list[epoch]
    b = b_list[epoch]
    
    y_pred = w * X + b
    plt.plot(X, y_pred, color='red')
    plt.show()

In [None]:
# Visualize the change of loss
plt.plot(loss_list)
plt.show()

### [P1.2] 이번엔 <font color=red>Tensorflow</font>를 활용해, 경사 하강법으로 최적의 선형 회귀 모델을 찾아주세요.

In [None]:
# Training setting
epochs = 1000
learning_rate = 1e-7

# Model weights and bias parameters
w = tf.Variable(0.0)
b = tf.Variable(0.0)

# Perform Gradient Descent
for i in range(epochs):
    
    
#################################################

    # Define MSE loss function (Hint: tf.GradientTape(), tf.reduce_mean())
    with ??? as tape:
        loss = 
        
    # Get gradients of parameters (Hint: tape.gradient())
    dw, db =    # dloss_dw, dloss_db
    
    # Update model weights and bias (Hint : assign_sub())
    w.
    b.
    
#################################################


# Convert parameters type from tensor to numpy
w = w.numpy()
b = b.numpy()

print('Trained model weights : %.4f' % w)
print('Trained model bias : %.4f' % b)

### 학습 결과를 그려보면 다음과 같습니다.

In [None]:
# Visualize the trained linear regression model
plt.scatter(X, y)   # scatter the original data
y_pred = w * X + b
plt.plot(X, y_pred, color='red')
plt.show()

### 학습된 모델의 파라미터가 정확히 일치합니다.
### 이처럼 Tensorflow를 사용하시면, 직접 파라미터의 gradient 값을 계산할 필요가 없어 매우 편리합니다.

## [문제 2] 비선형 회귀 모델

### 실제로는 선형적으론 표현하기 힘든 복잡한 데이터들이 많습니다.

In [None]:
# Load the dataset
with open('./data/polynomial_regression.pickle', 'rb') as f:
    X, y = pickle.load(f)

In [None]:
# Check the dataset distribution    
plt.scatter(X, y)
plt.show()

### [P2.1] <font color=red>Tensorflow</font>를 통한 경사 하강법으로, 최적의 비선형 회귀 모델을 찾아주세요.

In [None]:
# Training setting
epochs = 1000
learning_rate = 1e-7

# Model weights and bias parameters
w = tf.Variable(0.0)
b = tf.Variable(0.0)

# Perform Gradient Descent
for i in range(epochs):
    
    
#################################################
############# Hint : Same as before #############

    # Define MSE loss function (Hint: tf.GradientTape(), tf.reduce_mean())
    with ??? as tape:
        loss = 
    
    # Get gradients of parameters (Hint: tape.gradient())
    dw, db =    # dloss_dw, dloss_db
    
    # Update model weights and bias (Hint : assign_sub())
    w.
    b.
    
#################################################


# Convert parameters type from tensor to numpy
w = w.numpy()
b = b.numpy()

print('Trained model weights : %.4f' % w)
print('Trained model bias : %.4f' % b)

### 학습 결과를 그려보면 다음과 같습니다.

In [None]:
# Visualize the trained linear regression model
plt.scatter(X, y)   # scatter the original data
y_pred = w * X + b
plt.plot(X, y_pred, color='red')
plt.show()

### 선형 회귀 모델의 경우, 위의 데이터를 잘 표현하지 못하는 것 같습니다.
### 이럴 경우, 좀 더 복잡한 회귀 모델을 이용해 데이터를 표현할 수 있습니다.

## [P2.2] 이번엔 <font color=red>Tensorflow</font>를 이용한 경사 하강법으로, 최적의 3차 회귀 모델을 찾아주세요.

### Cubic regression model. (3차 함수)
\begin{equation*}
f(x) = w_1X^3 + w_2X^2 + w_3X + b
\end{equation*}

In [None]:
# Training setting
epochs = 1000
learning_rate = 1e-14


#################################################
### Define model weights and bias parameters ###
w1 = 
w2 = 
w3 = 
b = 

# Perform Gradient Descent
for i in range(epochs):
    
####### Hint : Consider multiple weights  #######

    # Define MSE loss function (Hint: tf.GradientTape(), tf.reduce_mean())
    with ??? as tape:
        loss = 
    
    # Get gradients of parameters (Hint: tape.gradient())
    # dloss_dw1, dloss_dw2, dloss_dw3, dloss_db
    dw1, dw2, dw3, db = 
    
    # Update model weights and bias (Hint : assign_sub())
    w1.
    w2.
    w3.
    b.
    
#################################################

# Convert parameters type from tensor to numpy
w1 = w1.numpy()
w2 = w2.numpy()
w3 = w3.numpy()
b = b.numpy()

# Print the trained parameters value
print('Trained model weights 1 : %.4f' % w1)
print('Trained model weights 2 : %.4f' % w2)
print('Trained model weights 3 : %.4f' % w3)
print('Trained model bias : %.4f' % b)

### 학습 결과를 그려보면 다음과 같습니다.

In [None]:
# Visualize the trained linear regression model
plt.scatter(X, y)   # scatter the original data
y_pred = w1*X**3 + w2*X**2 + w3*X + b
plt.plot(X, y_pred, color='red')
plt.show()

## [문제 3] Logistic 회귀 (분류 문제)

In [None]:
# Load the dataset
with open('./data/logistic_regression.pickle', 'rb') as f:
    data = pickle.load(f)
    
data.head()   # show the 5 elements

### '나이' 정보에 근거해 유저가 구매를 할지, 안할지를 구별하는 logistic 회귀 모델을 학습할 것입니다.

In [None]:
# Extract 'Age' and 'Purchased' data
X = data['Age'].to_numpy(dtype=np.float32)
y = data['Purchased'].to_numpy()

# Normalize 'Age' value
# X의 평균값이 0이 될 수 있도록 만들어, 학습의 안정성을 높이는 방법.
def normalize(X):
    return X - X.mean()
X = normalize(X)

# Visualizing the dataset
plt.scatter(X, y)
plt.show()

### 보시는 바와 같이 어릴수록 구매를 하지 않고, 나이가 많을수록 구매를 하는 경향성을 확인할 수 있습니다.

## [P3.1] <font color=red>Tensorflow</font>를 이용하지 않고, 최적의 logistic 회귀 모델을 경사 하강법으로 찾아주세요.

### Logitstic 함수의 모양은 S 모양으로, 0부터 1 사이의 값으로 변환시켜줍니다.
### 따라서 확률의 관점에서, Logistic 회귀 모델의 output이 0.5보다 크냐, 작냐를 기준으로 데이터를 분류할 수 있습니다.
### Logistic 회귀 모델은 -가능도 손실 함수를 최소화하는 방향으로 학습합니다.

**Logistic 회귀 모델**
\begin{equation*}
P(y_i=1|X_i) = \frac{1}{1 + e^{-(wX_i + b)}}
\end{equation*}

**-가능도 손실 함수**
\begin{equation*}
-\frac{1}{n} \sum_{i=1}^n (y_i * log(P(y_i=1|X_i)) + (1 - y_i) * log(1 - P(y_i=1|X_i)))
\end{equation*}

**Gradients 계산**
\begin{equation*}
\frac{\partial L}{\partial w} = \frac{1}{n} \sum_{i=1}^n ((P(y_i=1|X_i) - y_i) * X_i)
\end{equation*}

\begin{equation*}
\frac{\partial L}{\partial w} = \frac{1}{n} \sum_{i=1}^n (P(y_i=1|X_i) - y_i)
\end{equation*}

In [None]:
# Training setting
epochs = 1000
learning_rate = 1e-3

# Model weights and bias parameters
w = 0.0
b = 0.0

# Perform Gradient Descent
for i in range(epochs):
    
    
#################################################
######## Hint: use +, -, *, /, **, np.mean(), #######
########           np.exp(), np.log() ###########
    y_pred = 1 ? (1 ? ???(-(w ? X ? b)))  # Logistic regression
    loss =   # Likelihood loss
    
    dw =   # gradients w.r.t to w
    db =   # gradients w.r.t to b
    
    # Update model weights and bias
    w = 
    b = 
    
#################################################


print('Trained model weights : %.4f' % w)
print('Trained model bias : %.4f' % b)

### 학습 결과를 그려보면 다음과 같습니다.

In [None]:
# Visualize the trained linear regression model
plt.scatter(X, y)   # scatter the original data
y_pred = 1 / (1 + np.exp(-(w*X + b)))
plt.scatter(X, y_pred, color='red')
plt.axhline(y=0.5, color='orange', linestyle='--')   # show 0.5 threshold line

# Accurate results are colored as orange
index1 = (y == 0) * (y_pred < 0.5)
index2 = (y == 1) * (y_pred > 0.5)
index = index1 + index2
plt.scatter(X[index], y[index], color='orange')

plt.show()

## [P3.2] <font color=red>Tensorflow</font>를 이용한 경사 하강법을 통해 최적의 logistic 회귀 모델을 찾아주세요.

In [None]:
# Training setting
epochs = 1000
learning_rate = 1e-3

# Model weights and bias parameters
w = tf.Variable(0.0)
b = tf.Variable(0.0)

# Perform Gradient Descent
for i in range(epochs):
    
    
#################################################

    # Define Likelihood loss function (Hint: tf.GradientTape(), tf.reduce_mean())
    with ??? as tape:
        # Hint: use tf.exp()
        y_pred = 1 ? (1 ? ???(-(w ? X ? b)))
        # Hint: use tf.math.log
        loss = 
    
    
    # Get gradients of parameters (Hint: tape.gradient())
    dw, db =    # dloss_dw, dloss_db
    
    
    # Update model weights and bias (Hint : assign_sub())
    w.
    b.
    
    
#################################################


# Convert parameters type from tensor to numpy
w = w.numpy()
b = b.numpy()

print('Trained model weights : %.4f' % w)
print('Trained model bias : %.4f' % b)

### 학습 결과를 그려보면 다음과 같습니다.

In [None]:
# Visualize the trained linear regression model
plt.scatter(X, y)   # scatter the original data
y_pred = 1 / (1 + tf.exp(-(w*X + b)))
plt.scatter(X, y_pred, color='red')
plt.axhline(y=0.5, color='orange', linestyle='--')   # show 0.5 threshold line

# Accurate results are colored as orange
index1 = (y == 0) * (y_pred.numpy() < 0.5)
index2 = (y == 1) * (y_pred.numpy() > 0.5)
index = index1 + index2
plt.scatter(X[index], y[index], color='orange')

plt.show()