In [None]:
# Basic setting
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
%matplotlib inline

# Optimization Section
## [Problem 1] Linear Regression

In [None]:
# Load the dataset
with open('./data/linear_regression.pickle', 'rb') as f:
    X, y = pickle.load(f)

In [None]:
# Check the dataset distribution    
plt.scatter(X, y)
plt.show()

## [P1.1] You should find an optimal linear regression model using Gradient Descent <font color=red>without Tensorflow.</font>

### The best fit model should have the smallest difference between the predicted and real values for all data.
### Therefore, you should train the model to <font color=red>minimize the MSE(Mean Squared Error) loss.</font>
### You should fill in the blank which is about a loss function and gradients of model parameters.

**MSE loss**
\begin{equation*}
\left( \frac{1}{n} \sum_{i=1}^n (y_i - (wX_i + b))^2 \right)
\end{equation*}

**Gradients calculation**
\begin{equation*}
\frac{\partial L}{\partial w} = -2 * \frac{1}{n} \sum_{i=1}^n (y_i - (wX_i + b)) * X_i
\end{equation*}

\begin{equation*}
\frac{\partial L}{\partial b} = -2 * \frac{1}{n} \sum_{i=1}^n (y_i - (wX_i + b))
\end{equation*}

In [None]:
# Training setting
epochs = 1000
learning_rate = 1e-7

# Model weights and bias parameters
# f(x) = w * X + b
w = 0.0
b = 0.0

# Store model parameters and loss for visualization
w_list, b_list, loss_list = [], [], []

# Perform Gradient Descent
for i in range(epochs):
    n = float(len(X))   # number of elements in X
    
    
#################################################
######## Hint: use +, -, *, /, sum() ########
    # MSE loss
    # a**2 = a * a = square of a, a**3 = a * a * a
    loss =  (1 ? n) ? ???(y ? (w ? X ? b)**2)
    
    # derivative w.r.t to w
    dw = -2 ? (1 ? n) ? ???((y ? (w ? X ? b)) ? X)
    # derivative w.r.t to b
    db = -2 ? (1 ? n) ? ???(y ? (w ? X ? b))
#################################################
    
    # update w and b
    w = w - learning_rate * dw
    b = b - learning_rate * db
    
    w_list.append(w)
    b_list.append(b)
    loss_list.append(loss)

print('Trained model weights : %.4f' % w)
print('Trained model bias : %.4f' % b)

### Let's visualize the trained results.

In [None]:
# Visualize the trained linear regression model
plt.scatter(X, y)   # scatter the original data
y_pred = w * X + b
plt.plot(X, y_pred, color='red')
plt.show()

In [None]:
# Visualize the intermediate trained model
epochs_list = [1, 200, 400, 600, 800, 1000]

for i in range(len(epochs_list)):
    plt.scatter(X, y)   # scatter the original data
    
    # Load trained weights in specific epoch
    epoch = epochs_list[i] - 1   # In python, all indexes start from 0
    w = w_list[epoch]
    b = b_list[epoch]
    
    y_pred = w * X + b
    plt.plot(X, y_pred, color='red')
    plt.show()

In [None]:
# Visualize the change of loss
plt.plot(loss_list)
plt.show()

### [P1.2] You should find an optimal linear regression model using Gradient Descent <font color=red>with Tensorflow.</font>

In [None]:
# Training setting
epochs = 1000
learning_rate = 1e-7

# Model weights and bias parameters
w = tf.Variable(0.0)
b = tf.Variable(0.0)

# Perform Gradient Descent
for i in range(epochs):
    
    
#################################################

    # Define MSE loss function (Hint: tf.GradientTape(), tf.reduce_mean())
    with ??? as tape:
        loss = ???((y ? (w ? X ? b))**2)
        
    # Get gradients of parameters (Hint: tape.gradient())
    dw, db = ???(?, [?, ?])   # dloss_dw, dloss_db
    
    # Update model weights and bias (Hint : assign_sub())
    w.???(? * ?)
    b.???(? * ?)
    
#################################################


# Convert parameters type from tensor to numpy
w = w.numpy()
b = b.numpy()

print('Trained model weights : %.4f' % w)
print('Trained model bias : %.4f' % b)

### Let's visualize the trained results.

In [None]:
# Visualize the trained linear regression model
plt.scatter(X, y)   # scatter the original data
y_pred = w * X + b
plt.plot(X, y_pred, color='red')
plt.show()

### Trained model parameters are exactly same.
### If you use Tensorflow, you don't have to calculate gradients by hand!

## [Problem 2] Polynomial Regression

### In real world, there are a lot of complex data which are difficult to express linearly.

In [None]:
# Load the dataset
with open('./data/polynomial_regression.pickle', 'rb') as f:
    X, y = pickle.load(f)

In [None]:
# Check the dataset distribution    
plt.scatter(X, y)
plt.show()

### [P2.1] First, you need to find an optimal linear regression model using Gradient Descent <font color=red>with Tensorflow.</font>

In [None]:
# Training setting
epochs = 1000
learning_rate = 1e-7

# Model weights and bias parameters
w = tf.Variable(0.0)
b = tf.Variable(0.0)

# Perform Gradient Descent
for i in range(epochs):
    
    
#################################################
############# Hint : Same as before #############

    # Define MSE loss function (Hint: tf.GradientTape(), tf.reduce_mean())
    with ??? as tape:
        loss = ???((y ? (w ? X ? b))**2)
    
    # Get gradients of parameters (Hint: tape.gradient())
    dw, db = ???(?, [?, ?])   # dloss_dw, dloss_db
    
    # Update model weights and bias (Hint : assign_sub())
    w.???(? * ?)
    b.???(? * ?)
    
#################################################


# Convert parameters type from tensor to numpy
w = w.numpy()
b = b.numpy()

print('Trained model weights : %.4f' % w)
print('Trained model bias : %.4f' % b)

### Let's visualize the trained results.

In [None]:
# Visualize the trained linear regression model
plt.scatter(X, y)   # scatter the original data
y_pred = w * X + b
plt.plot(X, y_pred, color='red')
plt.show()

### The linear regression model does not seem to fit well.
### Maybe, a more complex model can better represent the given dataset.

## [P2.2] Next, you need to find an optimal <font color=red>polynomial regression model</font> using Gradient Descent <font color=red>with Tensorflow.</font>

### Learn a cubic regression model. (3차 함수)
\begin{equation*}
f(x) = w_1X^3 + w_2X^2 + w_3X + b
\end{equation*}

In [None]:
# Training setting
epochs = 1000
learning_rate = 1e-14


#################################################
### Define model weights and bias parameters ###
w1 = 
w2 = 
w3 = 
b = 

# Perform Gradient Descent
for i in range(epochs):
    
####### Hint : Consider multiple weights  #######

    # Define MSE loss function (Hint: tf.GradientTape(), tf.reduce_mean())
    with ??? as tape:
        loss = ???((y ? (w1 ? X**3 ? w2 ? X**2 ? w3 ? X ? b))**2)
    
    # Get gradients of parameters (Hint: tape.gradient())
    # dloss_dw1, dloss_dw2, dloss_dw3, dloss_db
    dw1, dw2, dw3, db = ???(?, [?, ?, ?, ?])   
    
    # Update model weights and bias (Hint : assign_sub())
    w1.???(? * ?)
    w2.???(? * ?)
    w3.???(? * ?)
    b.???(? * ?)
    
#################################################

# Convert parameters type from tensor to numpy
w1 = w1.numpy()
w2 = w2.numpy()
w3 = w3.numpy()
b = b.numpy()

# Print the trained parameters value
print('Trained model weights 1 : %.4f' % w1)
print('Trained model weights 2 : %.4f' % w2)
print('Trained model weights 3 : %.4f' % w3)
print('Trained model bias : %.4f' % b)

### Let's visualize the trained results.

In [None]:
# Visualize the trained linear regression model
plt.scatter(X, y)   # scatter the original data
y_pred = w1*X**3 + w2*X**2 + w3*X + b
plt.plot(X, y_pred, color='red')
plt.show()

## [Problem 3] Logistic Regression (Classification)

In [None]:
# Load the dataset
with open('./data/logistic_regression.pickle', 'rb') as f:
    data = pickle.load(f)
    
data.head()   # show the 5 elements

### Now, we are going to train logistic regression model to classify whether a user purchased or not, based on the 'Age' information.

In [None]:
# Extract 'Age' and 'Purchased' data
X = data['Age'].to_numpy(dtype=np.float32)
y = data['Purchased'].to_numpy()

# Normalize 'Age' value
# X의 평균값이 0이 될 수 있도록 만들어, 학습의 안정성을 높이는 방법.
def normalize(X):
    return X - X.mean()
X = normalize(X)

# Visualizing the dataset
plt.scatter(X, y)
plt.show()

### You can see the tendency <font color=red> not to purchase(y=0)</font> for the younger and <font color=red>to purchase(y=1)</font> for the older.

## [P3.1] You should find an optimal logistic regression model using Gradient Descent <font color=red>without Tensorflow.</font>

### The shape of Logistic function is S-shaped curve and it maps to a value between 0 and 1.
### Logistic regression model can classify the data based on 0.5 value from the perspective of probability.
### You can train a model to <font color=red>minimize -likelihood function(=maximize likelihood function).</font>

**Logistic regression**
\begin{equation*}
P(y_i=1|X_i) = \frac{1}{1 + e^{-(wX_i + b)}}
\end{equation*}

**Likelihood loss**
\begin{equation*}
-\frac{1}{n} \sum_{i=1}^n (y_i * log(P(y_i=1|X_i)) + (1 - y_i) * log(1 - P(y_i=1|X_i)))
\end{equation*}

**Gradients calculation**
\begin{equation*}
\frac{\partial L}{\partial w} = \frac{1}{n} \sum_{i=1}^n ((P(y_i=1|X_i) - y_i) * X_i)
\end{equation*}

\begin{equation*}
\frac{\partial L}{\partial w} = \frac{1}{n} \sum_{i=1}^n (P(y_i=1|X_i) - y_i)
\end{equation*}



In [None]:
# Training setting
epochs = 1000
learning_rate = 1e-3

# Model weights and bias parameters
w = 0.0
b = 0.0

# Perform Gradient Descent
for i in range(epochs):
    n = float(len(X))   # number of elements in X
    
    
#################################################
######## Hint: use +, -, *, /, **, sum(), #######
########           np.exp(), np.log() ###########
    y_pred = 1 ? (1 ? ???(-(w ? X ? b)))  # Logistic regression
    loss = -(1 ? n) ? ???(y ? ???(y_pred) ? (1 - y) ? ???(1 - y_pred))  # Likelihood loss
    
    dw = (1 ? n) ? ???((y_pred ? y) ? X)  # gradients w.r.t to w
    db = (1 ? n) ? ???(y_pred ? y)  # gradients w.r.t to b
    
    # Update model weights and bias
    w = w - ??? * ?
    b = b - ??? * ?
    
#################################################


print('Trained model weights : %.4f' % w)
print('Trained model bias : %.4f' % b)

### Let's visualize the trained results.

In [None]:
# Visualize the trained linear regression model
plt.scatter(X, y)   # scatter the original data
y_pred = 1 / (1 + np.exp(-(w*X + b)))
plt.scatter(X, y_pred, color='red')
plt.axhline(y=0.5, color='orange', linestyle='--')   # show 0.5 threshold line

# Accurate results are colored as orange
index1 = (y == 0) * (y_pred < 0.5)
index2 = (y == 1) * (y_pred > 0.5)
index = index1 + index2
plt.scatter(X[index], y[index], color='orange')

plt.show()

## [P3.2] You should find an optimal logistic regression model using Gradient Descent <font color=red>with Tensorflow.</font>

In [None]:
# Training setting
epochs = 1000
learning_rate = 1e-3

# Model weights and bias parameters
w = tf.Variable(0.0)
b = tf.Variable(0.0)

# Perform Gradient Descent
for i in range(epochs):
    
    
#################################################

    # Define Likelihood loss function (Hint: tf.GradientTape(), tf.reduce_mean())
    with ??? as tape:
        # Hint: use tf.exp()
        y_pred = 1 ? (1 ? ???(-(w ? X ? b)))
        # Hint: use tf.math.log
        loss = -???(y ? ???(y_pred) + (1 - y) ? ???(1 - y_pred))
    
    
    # Get gradients of parameters (Hint: tape.gradient())
    dw, db = ???(?, [?, ?])   # dloss_dw, dloss_db
    
    
    # Update model weights and bias (Hint : assign_sub())
    w.???(? * ?)
    b.???(? * ?)
    
    
#################################################


# Convert parameters type from tensor to numpy
w = w.numpy()
b = b.numpy()

print('Trained model weights : %.4f' % w)
print('Trained model bias : %.4f' % b)

### Let's visualize the trained results.

In [None]:
# Visualize the trained linear regression model
plt.scatter(X, y)   # scatter the original data
y_pred = 1 / (1 + tf.exp(-(w*X + b)))
plt.scatter(X, y_pred, color='red')
plt.axhline(y=0.5, color='orange', linestyle='--')   # show 0.5 threshold line

# Accurate results are colored as orange
index1 = (y == 0) * (y_pred.numpy() < 0.5)
index2 = (y == 1) * (y_pred.numpy() > 0.5)
index = index1 + index2
plt.scatter(X[index], y[index], color='orange')

plt.show()