In [227]:
# Notebook을 실행한 브라우저에서 바로 그림을 볼 수 있도록.
%matplotlib inline
#import math
import random # used for 1) generation of synthetic data or 2) initialization of model parameters 
import time # d2l에 들어있다.
import numpy as np
import tensorflow as tf
from IPython import display # d2l에 들어있다.
from d2l import tensorflow as d2l

In [228]:
# Vectorization for Speed
"""
n = 10000
a = tf.ones(n)
b = tf.ones(n)
"""

class Timer: #@save
    """"Record multiple running times."""
    def __init__(self):
        self.times = []
        self.start()

    def start(self):
        """"Start the timer."""
        self.tik = time.time()

    def stop(self):
        self.times.append(time.time() - self.tik)
        return self.times[-1]

    def avg(self):
        """"Return the average time."""
        return tf.reduce_sum(self.times) / len(self.times)

    def sum(self):
        """"Return the sum of time."""
        return tf.reduce_sum(self.times)

    def cumsum(self):
        """"Return the accumulated time."""
        return np.array(self.times).cumsum().tolist() # 리스트 형식은 연산이 되지 않아서 이런 것 같습니다.



In [229]:
"""
c = tf.Variable(tf.zeros(n)) # 'tf.Variable()' is a resource variable. I think it is for memory efficiency.
timer = Timer()
for i in range(n):
    c[i].assign(a[i] + b[i]) # ResourceVariable object does not support item assignment
f'{timer.stop():.5f} sec'
""";

In [230]:
"""
timer.start()
d = a + b
f'{timer.stop():.5f} sec'
""";

In [231]:
"""
def normal(x, mu, sigma):
    p = 1 / math.sqrt(2 * math.pi * sigma**2)
    return p * np.exp(-0.5 / sigma**2 * (x - mu)**2)

# Use numpy again for visualization
x = np.arange(-7, 7, 0.01)

# Mean and standard deviation pairs
params = [(0, 1), (0, 2), (3, 1)]
d2l.plot(x, [normal(x, mu, sigma) for mu, sigma in params], xlabel='x',
ylabel='p(x)', figsize=(4.5, 2.5),
legend=[f'mean {mu}, std {sigma}' for mu, sigma in params])
""";

In [232]:
# User-defined functions for plotting
def use_svg_display(): #@save
    """Use the svg format to display a plot in Jupyter."""
    display.set_matplotlib_formats('svg')

def set_figsize(figsize=(3.5, 2.5)): #@save
    """Set the figure size for matplotlib."""
    use_svg_display()
    d2l.plt.rcParams['figure.figsize'] = figsize

#@save
def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):
    """Set the axes for matplotlib."""
    axes.set_xlabel(xlabel)
    axes.set_ylabel(ylabel)
    axes.set_xscale(xscale)
    axes.set_yscale(yscale)
    axes.set_xlim(xlim)
    axes.set_ylim(ylim)
    if legend:
        axes.legend(legend)
    axes.grid() # 그리드 선을 구성하십시오. 격자무늬를 의미하는 것 같습니다.

#@save
def plot(X, Y=None, xlabel=None, ylabel=None, legend=None, xlim=None,
ylim=None, xscale='linear', yscale='linear',
fmts=('-', 'm--', 'g-.', 'r:'), figsize=(3.5, 2.5), axes=None):
    """Plot data points."""
    if legend is None:
        legend = []

    set_figsize(figsize)
    axes = axes if axes else d2l.plt.gca() # 'd2l.plt.gca()'로 현재의 axes 객체를 구할 수 있습니다.

    # Return True if 'X' (tensor or list) has 1 axis
    def has_one_axis(X):
        return(hasattr(X, "ndim") and X.ndim == 1 or isinstance(X, list)
        and not hasattr(X[0], "__len__"))

    if has_one_axis(X):
        X = [X] # It has len(X) = 1.
    if Y is None:
        X, Y = [[]] * len(X), X
    elif has_one_axis(Y):
        Y = [Y]
    if len(X) != len(Y):
        X = X * len(Y)
    axes.cla() # 현재의 좌표축을 지웁니다.
    for x, y, fmt in zip(X, Y, fmts):
        if len(x):
            axes.plot(x, y, fmt)
        else:
            axes.plot(y, fmt) # 얘는 일종의 방어적 프로그래밍으로 이해할 수 있다고 생각한다.
    set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend)

In [233]:
"""
d2l.set_figsize()
# The semicolon is for displaying the plot only.
d2l.plt.scatter(features[:, 1].numpy(), labels.numpy(), 1); # The last argument determines the size of respective pts.
""";

In [235]:
# Generating the Dataset
def synthetic_data(w, b, num_examples): #@save
    """Generate y = Xw + b + noise."""
    X = tf.zeros((num_examples, w.shape[0]))
    X += tf.random.normal(shape=X.shape)
    y = tf.matmul(X, tf.reshape(w, (-1, 1))) + b # Regression function. 왜 'reshape()'을 사용하는지는 한번만 출력해보면 알 수 있다.
    y += tf.random.normal(shape=y.shape, stddev=0.01)    
    y = tf.reshape(y, (-1, 1))
    return X, y

# Parameter Initialization
true_w = tf.constant([2, -3.4])
true_b = 4.2
#features, labels = synthetic_data(true_w, true_b, num_examples=1000)
features, labels = d2l.synthetic_data(true_w, true_b, num_examples=1000)

#print('features:', features[0], '\nlabel:', labels[0]) # Can be understood as part of EDA.

# Reading the Dataset
# Partitioning the dataset into minibatches
def load_array(data_arrays, batch_size, is_train=True): #@save
    """"Construct a Tensorflow data iterator.""" # 'yield문을 이용해서 generator를 반환하는 것과 유사한 목적입니다.'
    dataset = tf.data.Dataset.from_tensor_slices(data_arrays)
    if is_train:
        dataset = dataset.shuffle(buffer_size=1000)
    dataset = dataset.batch(batch_size=batch_size)
    return dataset

batch_size = 10
data_iter = load_array((features, labels), batch_size)
#next(iter(data_iter))

"""
# Prototype of partitioning into minibatches
def data_iter(batch_size, features, labels):
    num_examples = len(features) # size of the sample size
    indices = list(range(num_examples)) # form of [0, 1, ..., (n-1)]
    # The examples are read at random, in no particular order
    random.shuffle(indices) # shuffled list of indices of length 'num_examples'
    for i in range(0, num_examples, batch_size):
        j = tf.constant(indices[i:min(i + batch_size, num_examples)]) # 'min()' is used for the last batch.
        yield tf.gather(features, j), tf.gather(labels, j) # generator를 반환합니다. Memory-efficient하다는 점에서 강점이 있습니다.
"""

"""
batch_size = 10

for X, y in data_iter(batch_size, features, labels):
    print(X, '\n', y)
    break
"""

# Defining the Model and Initializing Model Parameters
# 'keras' is the high-level API for TensorFlow.
initializer = tf.initializers.RandomNormal(stddev=0.01)
net = tf.keras.Sequential()
net.add(tf.keras.layers.Dense(1, kernel_initializer=initializer))


"""
# Initializing Model Parameters
# !!!!! Note 'tf.GradientTape()' watches 'trainable=True' variables by default !!!!!
# !!!!! Weights랑 bias(es)는 무조건 'trainable'한 variable로 설정하는 것이 맞겠다 !!!!!
w = tf.Variable(tf.random.normal(shape=(2,1), mean=0, stddev=0.01), trainable=True) # 값 변경이 가능한 '변수'로 이해할 수 있다.
b = tf.Variable(tf.zeros(1), trainable = True)

# Defining the Model
def linreg(X, w, b): #@save
    # The linear regression model.
    return tf.matmul(X, w) + b
"""    

# Defining the Loss Function
loss = tf.keras.losses.MeanSquaredError()

"""
# Defining the Loss Function
def squared_loss(y_hat, y): #@save
    # Squared loss.
    return (y_hat - tf.reshape(y, y_hat.shape)) ** 2 / 2 # sum이 아닌 vector 형식으로 주어집니다.
"""

# Defining the Optimization Algorithm
trainer = tf.keras.optimizers.SGD(learning_rate=0.03)

"""
# Defining the Optimization Algorithm
# params: tuple of weight and bias 
# grads: tuple of gradients w.r.t weight and bias, respectively 
# lr: learning rate 
# batch_size: size of the batch
def sgd(params, grads, lr, batch_size): #@save
    # Minibatch stochastic gradient descent.
    for param, grad in zip(params, grads):
        param.assign_sub(lr*grad/batch_size)
"""

# Training
num_epochs = 3
for epoch in range(num_epochs):
    for X, y in data_iter:
        with tf.GradientTape() as tape:
            l = loss(net(X, training=True), y)
        grads = tape.gradient(l, net.trainable_variables)
        trainer.apply_gradients(zip(grads, net.trainable_variables))
    l = loss(net(features), labels) # Sample mean of the training error를 제공해주는 것 같습니다.
    print(f'epoch {epoch + 1}, loss {l:f}') 

# Discrepancy between Real Parameter Values
w = net.get_weights()[0]
print('error in estimating w', true_w - tf.reshape(w, true_w.shape))
b = net.get_weights()[1]
print('error in estimating b', true_b - b)

"""
# Training
lr = 0.03 # learning rate
num_epochs = 3 # number of epochs
net = linreg # Assumption on regression ftn (modeling)
loss = squared_loss # Loss function to optimize w.r.t. parameters

for epoch in range(num_epochs):
    for X, y in data_iter(batch_size, features, labels):
        with tf.GradientTape() as g:
            l = loss(net(X, w, b), y) # Minibatch loss in 'X' and 'y'
        # Compute gradient on l w.r.t. ['w', 'b']
        dw, db = g.gradient(l, [w, b])
        # Update parameters using their gradient
        sgd([w, b], [dw, db], lr, batch_size)
    train_l = loss(net(features, w, b), labels) # Recall that loss is given in a vector
    print(f'epoch {epoch + 1}, loss {float(tf.reduce_mean(train_l)):f}')

# Discrepancy between Real Parameter Values
print(f'error in estimating w: {true_w - tf.reshape(w, true_w.shape)}')
print(f'error in estimating b: {true_b - b}')
""";

epoch 1, loss 0.000241
epoch 2, loss 0.000101
epoch 3, loss 0.000101
error in estimating w tf.Tensor([-0.00021219 -0.0007658 ], shape=(2,), dtype=float32)
error in estimating b [-0.0006361]
