In [1]:
# Notebook을 실행한 브라우저에서 바로 그림을 볼수 있도록
%matplotlib inline
#import random # used for 1) generation of synthetic data or 2) initializations of model parameters
import time # d2l에 들어있다.
import numpy as np
import tensorflow as tf
from IPython import display # d2l에 들어있다.
from d2l import tensorflow as d2l

In [2]:
# Timer() ftn for calculating the time spent for a given operation
class Timer(): #@save
    """Record multiple running times."""
    def __init__(self):
        self.times = []
        self.start()

    def start(self):
        """Start the timer."""
        self.tik = time.time()

    def stop(self):
        self.times.append(time.time() - self.tik)
        return self.times[-1]

    def avg(self):
        """Return the average time."""
        return tf.reduce_sum(self.times) / len(self.times)

    def sum(self):
        """Return the sum of time."""
        return tf.reduce_sum(self.times)

    def cumsum(self):
        """Return the accumulated time."""
        return np.array(self.times).cumsum().tolist() # 리스트 형식은 연산이 되지 않아서 한번 변환이 이루어졌습니다.

In [3]:
# User-defined function for plotting
def use_svg_display(): #@save
    """Use the svg format to display a plot in the Jupyter."""
    display.set_matplotlib_formats('svg')

def set_figsize(figsize=(3.5, 2.5)): #@save
    """Set the figure size for matplotlib."""
    use_svg_display()
    d2l.plt.rcParams['figure.figsize'] = figsize

#@save
def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):
    """Set the axes for matplotlib."""
    axes.set_xlabel(xlabel)
    axes.set_ylabel(ylabel)
    axes.set_xscale(xscale)
    axes.set_yscale(yscale)
    axes.set_xlim(xlim)
    axes.set_ylim(ylim)
    if legend:
        axes.legend(legend)
    axes.grid() # 그리드 선을 구성하십시오. 격자무늬를 의미하는 것 같습니다.

#@save
def plot(X, Y=None, xlabel=None, ylabel=None, legend=None, xlim=None,
ylim=None, xscale='linear', yscale='linear',
fmts=('-', 'm--', 'g-.', 'r:'), figsize=(3.5, 2.5), axes=None):
    """Plot data points."""
    if legend is None:
        legend = []
    
    set_figsize(figsize)
    axes = axes if axis else d2l.plt.gca() # 'd2l.plt.gca()'로 현재의 axes 객체를 구할 수 있습니다.

    # Return True if 'X' (tensor or list) has 1 axis.
    def has_one_axis(X):
        return(hasattr(X, "ndim") and X.ndim == 1 or isinstance(X, list) 
        and not hasattr(X[0], "__len__"))

    if has_one_axis(X):
        X = [X] # It has len(X) = 1 after an operation.
    if Y is None:
        X, Y = [[]] * len(X), X
    elif has_one_axis(Y):
        Y = [Y]
    if len(X) != len(Y):
        X = X * len(Y) # Same support임을 guarantee 해줍니다.
    axes.cla() # 현재의 좌표축을 지웁니다.
    for x, y, fmt in zip(X, Y, fmts):
        if len(x):
            axes.plot(x, y, fmt)
        else:
            axes.plot(y, fmt) # 얘는 일종의 방어적 프로그래밍으로 이해할 수 있다고 생각합니다.
    set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend)

#######################################################################
#######################################################################
#######################################################################

# User-defined ftn transferring numeric labels into text labels
# labels: an array of numerical labels
# !!!!! It is used to make title arguments in 'titles' option in 'show_images()' ftn !!!!!
def get_fashion_mnist_labels(labels): #@save
    """Return text labels for the Fashion-MNIST dataset."""
    text_labels = [
        't-shirt', 'trouser', 'pullover', 'dress', 'coat',
        'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot'
    ]
    return [text_labels[int(i)] for i in labels]

# User-defined ftn to visualize the examples.
def show_images(imgs, num_rows, num_cols, titles=None, scale=1.5): #@save
    """Plot a list of images."""
    figsize = (num_cols * scale, num_rows, * scale)
    _, axes = d2l.plt.subplots(num_rows, num_cols, figsize=figsize)
    # 다차원 배열을 1차원 배열로 만들어주는 함수로 이해하면 될 것 같습니다.
    # 아래에서 진행될 iteration을 보다 용이하게 하기 위해서, 'flatten()' 함수를 사전에 사용하는 것 같습니다.
    axes = axes.flatten()
    for i, (ax, img) in enumerate(zip(axes, imgs)):
        # 왜 '.numpy()'로 접근을 하는 것인지는 'imgs' argument로 들어가는 object를 확인
        # 그렇지 않으면, 'array'가 아닌 'tensor'이기 때문입니다.
        ax.imshow(img.numpy())
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        if titles:
            ax.set_title(titles[i]) # 'enumerate()' ftn을 사용한 이유입니다.
        return axes     

#######################################################################
#######################################################################
#######################################################################

# Defining a utility class that plot data in animation.
class Animator: #@save
    """For plotting data in animation."""
    def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None, 
    ylim=None, xscale='linear', yscale='linear',
    fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,
    figsize=(3.5, 2.5)):
        # Incrementally plot multiple lines
        if legend is None:
            legend =[]
        d2l.use_svg_display()
        self.fig, self.axes = d2l.plt.subplots(nrows, ncols, figsize=figsize)
        if nrows * ncols == 1:
            self.axes = [self.axes, ]
        # Use a lambda ftn to capture arguments
        self.config_axes = d2l.set_axes(
            self.axes[0], xlabel, ylabel, xli, ylim , xscale, yscale, legend)
        self.X, self.Y, self.fmts = None, None, fmts

    def add(self, x, y):
        # Add multiple data points into the figure
        if not hasattr(y, "__len__"):
            y = [y]
        n = len(y)
        if not hasattr(x, "__len__"):
            x = [x] * n
        if not self.X:
            self.X = [[] for _ in range(n)]
        if not self.Y:
            self.Y = [[] for _ in range(n)]
        for i, (a, b) in enumerate(zip(x, y))
            if a is not None and b is not None:
                self.X[i].append(a)
                self.Y[i].append(b)
        self.axes[0].cla()
        for x, y, fmt in zip(self.X, self.Y, self.fmts):
            self.axes[0].plot(x, y, fmt)
        self.config_axes()
        display.display(self.fig)
        display.clear_output(wait=True)

In [4]:
# Generating the Dataset
# w : tensor of weight
# b : bias (length 1 - since linear regression setting)
# num_examples : number of (training) samples to generate
def synthetic_data(w, b, num_examples): #@save
    """Generate y = Xw + b + noise."""
    X = tf.zeros((num_examples, w.shape[0])) # 'w.shape[0]'을 하는 이유는 w가 tensor이기 때문입니다. 궁금하다면 관련 코드를 직접 작성하여 실행해보면 될 것 같습니다.
    X += tf.random.normal(shape=X.shape)
    y = tf.matmul(X, tf.reshape(w, (-1,1))) + b # Regression function. 왜 'tf.reshape()'을 사용하는지는 'w.shape()'를 한번 사용해보면 알 수 있습니다.
    y += tf.random.normal(shape=y.shape, stddev=0.01)
    y = tf.reshape(y, (-1, 1))
    return X, y

In [5]:
# Partitioning the Dataset into Minibatches
# Training set이 아닌 경우에는 shuffling이 불필요하다고 생각하는 것 같습니다.
# data_arrays : tuple of (features, labels) 
# batch_size : size of the batch
# is_train : if True, then shuffle.
def load_array(data_arrays, batch_size, is_train=True): #@save
    """Construct a Tensorflow data iterator.""" # yield문을 이용해서 generator를 반환하는 것과 유사한 / 동일한 목적입니다.
    dataset = tf.data.Dataset.from_tensor_slices(data_arrays)
    if is_train:
        dataset = dataset.shuffle(buffer_size=1000)
    dataset = dataset.batch(batch_size=batch_size)
    return dataset # Now, the time for iterator to do his/her work (e.g. next(iter(data_iter))). 

In [6]:
# !!!!! FOR CLASSIFICATION !!!!!
# Loading and Partitioning the Training and Test Datasets into Minibatches
# Returns the iterator for memory-efficiency
def load_data_fashion_mnist(batch_size, resize=None): #@save
    """Download the Fashion-MNIST dataset and then load it into memory."""
    mnist_train, mnist_test = tf.keras.datasets.fashion_mnist.load_data()
    # Divide all numbers by 255 so that all pixel values are between
    # 0 and 1, add a batch dimension at the last (one for channel 의미하는 듯).
    # And cast label to int32.
    process = lambda X, y: (
        tf.expand_dims(X, axis=3) / 255, tf.cast(y, dtype='int32')
    )
    resize_fn = lambda X, y: (
        tf.image.resize_with_pad(X, resize, resize) if resize else X, y
    )
    return (
        tf.data.Dataset.from_tensor_slices(process(*mnist_train)).batch(
            batch_size).shuffle(len(mnist_train[0])).map(resize_fn),
        tf.data.Dataset.from_tensor_slices(process(*mnist_test)).batch(
            batch_size).map(resize_fn)
    )

In [7]:
# Defining the Model ('net')
# There exist two versions : 
# 1) Manually made type
# 2) tf.keras.Sequential()에 layer을 addition함으로써 : linear model도 accommodate 가능함을 기억할 필요가 있습니다.
# 이 code chunk에는 1)에 대응되는 user-defined ftns만을 정리하여 제공할 계획입니다.
# 이에 따라, 2)의 경우에는 원형이 되는 prototype만을 아래와 같이 주석으로 제공하도록 하겠습니다.
# 추가적으로, 2)의 경우에는 initialization of model parameters for training도 함께 가능하도록 작성할 수 있습니다.
"""
initializer = tf.initializers.RandomNormal(stddev=0.01)
net = tf.keras.Sequential()
net.add(tf.keras.layers.Dense(1, kernel_initializer=initializer))
"""

def linreg(X, w, b): #@save
    # The linear regression model.
    return tf.matmul(X, w) + b

########################################################################
########################################################################
########################################################################

"""
net = tf.keras.Sequential()
net.add(tf.keras.layers.Flatten(input_shape=(28, 28)))
weight_initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=0.01)
ned.add(tf.keras.layers.Dense(10, kernel_initializer=weight_initializer))
"""

# Defining the 'softmax()' operation.
def softmax(X):
    X_exp = tf.exp(X)
    partition = tf.reduce_sum(X, axis=1, keepdims=True)
    return X_exp / partition # The broadcasting mechanism is applied here, thx to 'keepdims=True' option.

# Defining the Model
def net(X):
    return softmax(tf.matmul(tf.reshape(X, (-1, W.shape[0])), W) + b)

########################################################################
########################################################################
########################################################################

"""
net = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(10)
])
"""

# Defining the 'relu()' activation function.
def relu(X):
    return tf.math.maximum(X, 0)

# MLP w/ single hidden layer
def net(X):
    X = tf.reshape(X, (-1, num_inputs))
    H = relu(tf.matmul(X, W1) + b1)
    return tf.matmul(H, W2) + b2

# MLP w/ two hidden layers
def net(X):
    X = tf.reshape(X, (-1, num_inputs))
    H1 = relu(tf.matmul(X, W1) + b1)
    H2 = relu(tf.matmul(H1, W2) + b2)
    return tf.matmul(H2, W3) + b3 

In [8]:
# Defining the Loss Function ('loss')
# There exist two versions :
# 1) Manually made type
# 2) tf.keras.losses에 속한 loss function들 중 하나를 이용하는 방법이 있습니다.
"""
loss = tf.keras.losses.MeanSquaredError()
"""

def squared_loss(y_hat, y): #@save
    # Squared loss.
    return (y_hat - tf.reshape(y, y_hat.reshape)) ** 2 / 2 # sum이 아닌 vector 형식으로 주어집니다.

########################################################################
########################################################################
########################################################################

"""
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
"""

def cross_entropy(y_hat, y):
    return -tf.math.log(tf.boolean_mask(
        y_hat, tf.one_hot(y, depth=y_hat.shape[-1])))

########################################################################
########################################################################
########################################################################

"""
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
"""

# Loss function
def loss(y_hat, y):
    return tf.losses.sparse_categorical_crossentropy(
        y, y_hat, from_logits=True
    )

In [10]:
# Defining the Optimization Algorithm
# There exist two versions :
# 1) Manually made type
# 2) tf.keras.optimizers.OPTIMIZER(learning_rate=0.03)
"""
trainer = tf.keras.optimizers.SGD(learning_rate=0.03)
"""

# params: tuple of weight and bias
# grads: tuple of gradients w.r.t weight and bias, respectively
# lr: learning rate
# batch_size: size of the batch
def sgd(params, grads, lr, batch_size): #@save
    # Minibatch stochastic gradient descent
    for param, grad in zip(params, grads):
        param.assign_sub(lr*grad/batch_size)

########################################################################
########################################################################
########################################################################

"""
trainer = tf.keras.optimizers.SGD(learning_rate=.1)
"""

class Updater(): #@save
    """For updating parameters using minibatch stochastic gradient descent."""
    def __init__(self, params, lr):
        self.params = params
        self.lr = lr

    def __call__(self, batch_size, grads):
        d2l.sgd(self.params, grads, self.lr, batch_size)

In [11]:
########################################################################
########################################################################
########################################################################

# Classification Accuracy
def accuracy(y_hat, y): #@save
    """Compute the total number of correct predictions."""
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1: # 첫 번째 argument의 의미 잘 이해하자.
        y_hat = tf.argmax(y_hat, axis=1)
    cmp = tf.cast(y_hat, dtype=y.dtype) == y
    return float(tf.reduce_sum(tf.cast(cmp, dtype=y.dtype)))

# 'Accumulator()' is a utility class to accumulate sums over multiple variables.
class Accumulator: #@save
    """For accumulating sums over 'n' variables."""
    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

# Evaluate the accuracy for any model 'net' on a dataset
# that is accessed via the data iterator 'data_iter'.
def evaluate_accuracy(net, data_iter): #@Save
    """Compute the accuracy for a model on a dataset."""
    metric = Accumulator(2) # No. of correct predictions, no. of predictions
    for X, y in data_iter:
        metric.add(accuracy(net(X), y), d2l.size(y))
    return metric[0] / metric[1]

In [12]:
# Training in classification case can be done using this single module.
# 1. data set into minibatches (iterator) / 2. parameter initialization / 
# 3. net / 4. loss / 5. updater / 6. learning rate / 7. number of epochs
def train_epoch_ch3(net, train_iter, loss, updater): #@save
    """The training loop defined in Chapter 3."""
    # Sum of training loss, sum of training accuracy, no. of examples
    metric = Accumulator(3)
    for X, y in train_iter:
        # Compute gradients and update parameters
        with tf.GradientTape() as tape:
            y_hat = net(X)
            # Keras implementation for loss takes (labels, predictions)
            # instead of (predictions, labels) that users might implement
            # in this book , e.g. 'cross_entropy()' that we implemented above.
            if isinstance(loss, tf.keras.losses.Loss):
                l = loss(y, y_hat)
            else:
                l = loss(y_hat, y)
        if isinstance(updater, tf.keras.optimizers.Optimizer):
            params = net.trainable_variables # 잘 생각해보면 keras를 사용할 경우 따로 object를 정의할 필요가 없기 때문입니다.
            grads = tape.gradient(l, params) # tuple of weights and biases
            updater.apply_gradients(zip(grads, params))
        else:
            updater(X.shape[0], tape.gradient(l, updater.params))
        # Keras loss by default returns the average loss in a batch
        l_sum = l * float(tf.size(y)) if isinstance(
            loss, tf.keras.losses.Loss) else tf.reduce_sum(l)
        metric.add(l_sum, accuracy(y_hat, y), tf.size(y))
    # Return training loss and training accuracy (sample mean version)
    return metric[0] / metric[2], metric[1] / metric[2]

# !!!!!!!!!! WHAT WE USE IS THIS ONE !!!!!!!!!!
def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater): #@save
    """Train a model (defined in Chapter 3)."""
    animator = Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0.3, 0.9],
    legend=['train loss', 'train acc', 'test acc'])
    for epoch in range(num_epochs):
        train_metrics = train_epoch_ch3(net, train_iter, loss, updater)
        test_acc = evaluate_accuracy(net, test_iter)
        animator.add(epoch + 1, train_metrics + (test_acc,))
    train_loss, train_acc = train_metrics
    # 다음의 세줄은 training 및 testing이 잘 이루어졌는지 확인해주는 코드로서,
    # 그렇지 않을 시에 대응되는 값을 뱉어내는 함수로 이해할 수 있습니다.
    assert train_loss < 0.5, train_loss
    assert train_acc <= 1 and train_acc > 0.7, train_acc
    assert test_acc <= 1 and test_acc > 0.7, test_acc

In [13]:
# Prediction
def predict_ch3(net, test_iter, n=6): #@save
    """Predict labels (defined in Chapter 3)."""
    for X, y in test_iter:
        break
    trues = d2l.get_fashion_mnist_labels(y)
    preds = d2l.get_fashion_mnist_labels(tf.argmax(net(X), axis=1))
    titles = [true + '\n' + pred for true, pred in zip(trues, preds)]
    d2l.show_images(
        tf.reshape(X[0:n], (n, 28, 28)), 1, n, titles=titles[0:n])