##### Copyright 2019 The TensorFlow Authors.

In [None]:
#@title Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# TensorFlow 2 quickstart for beginners

<table class="tfo-notebook-buttons" align="left">
  <td>
    <a target="_blank" href="https://www.tensorflow.org/tutorials/quickstart/beginner"><img src="https://www.tensorflow.org/images/tf_logo_32px.png" />View on TensorFlow.org</a>
  </td>
  <td>
    <a target="_blank" href="https://colab.research.google.com/github/tensorflow/docs/blob/master/site/en/tutorials/quickstart/beginner.ipynb"><img src="https://www.tensorflow.org/images/colab_logo_32px.png" />Run in Google Colab</a>
  </td>
  <td>
    <a target="_blank" href="https://github.com/tensorflow/docs/blob/master/site/en/tutorials/quickstart/beginner.ipynb"><img src="https://www.tensorflow.org/images/GitHub-Mark-32px.png" />View source on GitHub</a>
  </td>
  <td>
    <a href="https://storage.googleapis.com/tensorflow_docs/docs/site/en/tutorials/quickstart/beginner.ipynb"><img src="https://www.tensorflow.org/images/download_logo_32px.png" />Download notebook</a>
  </td>
</table>

This short introduction uses [Keras](https://www.tensorflow.org/guide/keras/overview) to:

1. Build a neural network that classifies images.
2. Train this neural network.
3. And, finally, evaluate the accuracy of the model.

This is a [Google Colaboratory](https://colab.research.google.com/notebooks/welcome.ipynb) notebook file. Python programs are run directly in the browser—a great way to learn and use TensorFlow. To follow this tutorial, run the notebook in Google Colab by clicking the button at the top of this page.

1. In Colab, connect to a Python runtime: At the top-right of the menu bar, select *CONNECT*.
2. Run all the notebook code cells: Select *Runtime* > *Run all*.

Download and install TensorFlow 2. Import TensorFlow into your program:

Note: Upgrade `pip` to install the TensorFlow 2 package. See the [install guide](https://www.tensorflow.org/install) for details.

In [None]:
!pip install IPython



In [None]:
import tensorflow as tf

Load and prepare the [MNIST dataset](http://yann.lecun.com/exdb/mnist/). Convert the samples from integers to floating-point numbers:

In [None]:
# mnist
from IPython.display import Image
Image(url="https://upload.wikimedia.org/wikipedia/commons/2/27/MnistExamples.png")

In [None]:
mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train[0][5] # mnist data를 load하면, 28 * 28 행렬에 0~255 의 값이 들어있다. 즉, 3차원 배열이다.

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,
        18,  18,  18, 126, 136, 175,  26, 166, 255, 247, 127,   0,   0,
         0,   0], dtype=uint8)

In [None]:
x_train, x_test = x_train / 255.0, x_test / 255.0 # 이걸 255로 나눠서 0~1 의 값으로 만들어주는데, 이 작업을 normalization(pre-processing 작업 중 하나임)이라고 한다.
x_train[0][5]

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.01176471, 0.07058824, 0.07058824,
       0.07058824, 0.49411765, 0.53333333, 0.68627451, 0.10196078,
       0.65098039, 1.        , 0.96862745, 0.49803922, 0.        ,
       0.        , 0.        , 0.        ])

In [None]:
print(x_train.shape, y_train.shape, type(x_train), type(y_train))
print(x_test.shape, y_test.shape, type(x_test), type(y_test))

(60000, 28, 28) (60000,) <class 'numpy.ndarray'> <class 'numpy.ndarray'>
(10000, 28, 28) (10000,) <class 'numpy.ndarray'> <class 'numpy.ndarray'>


In [None]:
x_train[0][5] # 결과를 보면 

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.01176471, 0.07058824, 0.07058824,
       0.07058824, 0.49411765, 0.53333333, 0.68627451, 0.10196078,
       0.65098039, 1.        , 0.96862745, 0.49803922, 0.        ,
       0.        , 0.        , 0.        ])

In [None]:
# train, validation, test data
from IPython.display import Image
Image(url="https://wikidocs.net/images/page/31947/%EB%8D%B0%EC%9D%B4%ED%84%B0.PNG")

Build the `tf.keras.Sequential` model by stacking layers. Choose an optimizer and loss function for training:

In [None]:
model = tf.keras.models.Sequential([
  # input layer , Flatten 28 * 28 행렬을 쭉 펴서 784 개의 1차원 행렬로 만들어줌
  tf.keras.layers.Flatten(input_shape=(28, 28)), 
  # hidden layer, 784개를 input으로 받아서 output을 128개로 출력한다.
  # 그리고 activate function으로 relu를 사용한다. 일반적으로 hidden layer에는 relu를 쓴다고 한다. 
  tf.keras.layers.Dense(128, activation=tf.nn.relu), 
  # overfitting 방지를 위해 0.2 비율로 노드를 비활성화 시킴
  tf.keras.layers.Dropout(0.2), 
  # output layter, 결과를 10개로 설정한다. 문제가 0~9까지 10개의 숫자를 구분하는 문제이기 때문임.
  tf.keras.layers.Dense(10)  # 이번 예제는 softmax를 따로 입히고 있는데, tf.keras.layers.Dense(10, activation='softmax') 이렇게 한번에 구현도 가능하다.
  # 단, 학습할 때는 softmax를 사용하고, test 단계에서는 softmax를 사용하지 않는 것이 통상적이다. 지수함수 계산 시, 자원낭비가 심하기 때문임.
])

In [None]:
# Flatten
from IPython.display import Image
Image(url="https://data-flair.training/blogs/wp-content/uploads/sites/2/2020/07/Flatten-Layer-in-Keras-df.jpg")

In [None]:
# Activate Function
from IPython.display import Image
Image(url="https://qph.fs.quoracdn.net/main-qimg-65a7c3bf75549bad04875d0e789bb5bf")

In [None]:
# overfitting
from IPython.display import Image
Image(url="https://alphadevelopment.com/wp-content/uploads/2019/03/overfitting-300x201.jpg")

For each example the model returns a vector of "[logits](https://developers.google.com/machine-learning/glossary#logits)" or "[log-odds](https://developers.google.com/machine-learning/glossary#log-odds)" scores, one for each class.

In [None]:
predictions = model(x_train[:1]).numpy() # [:1] 1번째까지 데이터로 예측 모델
predictions # 앞서 model 함수에서 마지막 layer의 output값을 10개로 설정했기 때문에 값이 10개로 출력됨.

array([[-0.18013436,  0.58413255, -0.16365948, -0.16508624,  0.2176819 ,
         0.13548128,  0.25180748, -0.35308874, -0.83827215,  0.16484135]],
      dtype=float32)

The `tf.nn.softmax` function converts these logits to "probabilities" for each class: 

In [None]:
tf.nn.softmax(predictions).numpy() #softmax함수는 결과의 합이 1이 되도록 결과를 전체에 대한 비율형태로 출력해준다.

array([[0.08097127, 0.1738793 , 0.08231632, 0.08219896, 0.12053145,
        0.11101998, 0.12471566, 0.06811109, 0.04192812, 0.11432784]],
      dtype=float32)

Note: It is possible to bake this `tf.nn.softmax` in as the activation function for the last layer of the network. While this can make the model output more directly interpretable, this approach is discouraged as it's impossible to
provide an exact and numerically stable loss calculation for all models when using a softmax output. 

The `losses.SparseCategoricalCrossentropy` loss takes a vector of logits and a `True` index and returns a scalar loss for each example.

In [None]:
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

This loss is equal to the negative log probability of the true class:
It is zero if the model is sure of the correct class.

This untrained model gives probabilities close to random (1/10 for each class), so the initial loss should be close to `-tf.math.log(1/10) ~= 2.3`.

In [None]:
loss_fn(y_train[:1], predictions).numpy()

2.1980453

In [None]:
# optimizer : Loss를 줄이기 위한 방법 설정. SGD(확률적경사하강법), RMSprop, Adam(SGD와 RMSprop의 조합으로 생각)
# loss function : loss 를 계산하는 방법. binary_crossentropy, categorical_crossentropy, sparse_categorical_crossentropy 등 
model.compile(optimizer='adam',
              loss=loss_fn,
              metrics=['accuracy'])

The `Model.fit` method adjusts the model parameters to minimize the loss: 

In [None]:
print(x_train.shape, y_train.shape, type(x_train), type(y_train))

(60000, 28, 28) (60000,) <class 'numpy.ndarray'> <class 'numpy.ndarray'>


In [None]:
model.fit(x_train, y_train, epochs=5, batch_size=32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x7f818475c810>

The `Model.evaluate` method checks the models performance, usually on a "[Validation-set](https://developers.google.com/machine-learning/glossary#validation-set)" or "[Test-set](https://developers.google.com/machine-learning/glossary#test-set)".

In [None]:
model.evaluate(x_test,  y_test, verbose=2) # verbose(0:아무것도안나옴, 1:progress bar, 2:loss, 정확도 등 결과만 나옴) 일단 여기까지 돌려보기!!

313/313 - 0s - loss: 0.0740 - accuracy: 0.9796


[0.07397687435150146, 0.9796000123023987]

The image classifier is now trained to ~98% accuracy on this dataset. To learn more, read the [TensorFlow tutorials](https://www.tensorflow.org/tutorials/).

If you want your model to return a probability, you can wrap the trained model, and attach the softmax to it:

In [None]:
probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])

In [None]:
probability_model(x_test)

<tf.Tensor: shape=(10000, 10), dtype=float32, numpy=
array([[2.15051131e-07, 3.64329367e-09, 9.96033918e-07, ...,
        9.99813378e-01, 9.76438770e-08, 1.90462979e-05],
       [1.70235603e-09, 7.17849587e-04, 9.99257386e-01, ...,
        6.61687881e-13, 1.08058339e-06, 5.64914055e-15],
       [2.35109539e-07, 9.99383926e-01, 5.66446834e-05, ...,
        3.08196730e-04, 1.83768701e-04, 5.21253241e-07],
       ...,
       [5.19679514e-13, 1.00555154e-10, 1.69502103e-11, ...,
        7.78424783e-06, 1.20474908e-07, 1.27177109e-05],
       [2.39120563e-08, 8.20373529e-08, 5.01137153e-12, ...,
        1.26106494e-08, 1.57628485e-04, 9.60112212e-10],
       [2.82483188e-06, 1.27020703e-12, 1.82105111e-07, ...,
        2.58033872e-11, 1.46279933e-09, 4.93809420e-12]], dtype=float32)>