In [21]:
# Notebook을 실행한 브라우저에서 바로 그림을 볼수 있도록
%matplotlib inline
import random # used for 1) generation of synthetic data or 2) initializations of model parameters
import numpy as np
import tensorflow as tf
from IPython import display # d2l에 들어있다.
from d2l import tensorflow as d2l

In [22]:
# Parameter Initialization (이거는 true parameter이다. 헷갈리지 말자.)
true_w = tf.constant([2, -3.4])
true_b = 4.2

features, labels = d2l.synthetic_data(true_w, true_b, num_examples=1000)

In [23]:
# Prototype of Partitioning into Minibatches
def data_iter(batch_size, features, labels):
    num_examples = len(features) # size of the sample size
    indices = list(range(num_examples)) # form of [0, 1, ..., (n-1)]
    # The examples are read at random, in no particular order
    random.shuffle(indices) # shuffled list of indices of length 'num_examples'
    for i in range(0, num_examples, batch_size):
        j = tf.constant(indices[i:min(i + batch_size, num_examples)]) # 'min()' is used for the last batch.
        yield tf.gather(features, j), tf.gather(labels, j) # generator를 반환합니다. Memory-efficient하다는 점에서 강점이 있습니다.

In [24]:
# Initializing Model Parameters
# !!!!! Note 'tf.GradientTape()' watches 'trainable=True' variables by default !!!!! -> keras를 잘 활용하면 고민할 필요가 없습니다.
# !!!!! Weights랑 bias(es)는 무조건 'trainable'한 variable로 설정하는 것이 맞겠다 !!!!!
w = tf.Variable(tf.random.normal(shape=(2, 1), mean=0, stddev=0.01), trainable=True) # 값 변경이 가능한 '변수'로 이해할 수 있다.
b = tf.Variable(tf.zeros(1), trainable=True)

In [25]:
# Training
# 이 부분 역시 뒷부분에 user-defined ftn으로 대체되는데, 그 부분에 갔을 때에 대체하도록 하겠습니다.
lr = 0.03 # learning rate
num_epochs = 3 # number of epochs
batch_size = 10 # size of the batch
net = d2l.linreg # loss function to optimize w.r.t parameters
loss = d2l.squared_loss # loss function to optimize w.r.t parameters

for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
    #for X, y in d2l.load_array((features, labels), batch_size):
        with tf.GradientTape() as g:
            l = loss(net(X, w, b), y) # Minibatch loss in 'X' and 'y'
        # Compute gradient on l w.r.t ['w', 'b']
        dw, db = g.gradient(l, [w, b])
        # Update parameters using their gradient
        d2l.sgd([w, b], [dw, db], lr, batch_size)
    train_l = loss(net(features, w, b), labels) # Recall that loss is given in a vector
    print(f'epoch {epoch + 1}, loss {float(tf.reduce_mean(train_l)):f}')

# Discrepancy btw real parameter values
print(f'error in estimating w: {true_w - tf.reshape(w, true_w.shape)}')
print(f'error in estimating b: {true_b - b}')

epoch 1, loss 0.040907
epoch 2, loss 0.000152
epoch 3, loss 0.000050
error in estimating w: [-6.7472458e-05 -1.8668175e-04]
error in estimating b: [-0.00020838]
