In [3]:
import tensorflow as tf
import numpy as np

># TensorBoard 활용하기

- 신경망은 깊어질수록 매개변수도 많아지고, 내부에서 어떤 일들이 일어나는지 파악하기 어려움
- 따라서 학습 중에 상태를 모니터링하고 틀린부분을 곧바로 찾을 수 있도록 TensorFlow 계산을 모니터링 할 수 있게 해 주는 도구로 TensorBoard 사용

___
>## 기본 준비와 계산 그래프 출력하기

- 어떤 것을 출력할지, 어떤 시점에서 데이터를 모니터링 하고 싶은지 정도는 사전에 정의 하여야 함
- 앞서 '7. Neural Network'에서 구성한 신경망을 사용하여 Tensor Board 테스트
- 세션 시작 후 다음과 같이 한줄을 삽입
```python
with tf.Session() as sess:
    writer = tf.summary.FileWriter('gs://<BUCKET>/dnnmodel', sess.graph) # 해당 줄 추가
```

In [5]:
import pandas as pd

query = 'select * from testdataset.wdbc order by index'
dataset = pd.read_gbq(project_id = 'mlwithgcp', query = query)

dataset['diagnostic'] = dataset['diagnostic'].apply(lambda X: 0 if X=='M' else 1)
dataset.drop('index', axis = 1, inplace = True)
X_dataset = dataset.drop('diagnostic', axis = 1).as_matrix()
y_dataset = dataset['diagnostic'].as_matrix()

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_dataset, y_dataset,
                                                   test_size = 0.2, 
                                                   random_state = 42)

Requesting query... ok.
Job ID: f1f39a0e-90b6-4718-ad16-9b1ec75eaf97
Query running...
Query done.
Processed: 139.5 KB Billed: 10.0 MB
Standard price: $0.00 USD

Retrieving results...
Got 569 rows.

Total time taken 3.86 s.
Finished at 2018-11-12 23:15:52.


In [6]:
NUM_FEATURES = 30
NUM_UNITS_H1 = 4
NUM_UNITS_H2 = 4
NUM_CLASSES = 2

with tf.Graph().as_default():
  # 유닛
  X = tf.placeholder(np.float32, shape = [None, NUM_FEATURES], name = 'X')
  y = tf.placeholder(np.float32, shape = [None], name = 'y')
  
  # 은닉층
  hidden1 = tf.layers.dense(inputs = X,
                           units = NUM_UNITS_H1,
                           activation = tf.nn.relu,
                           name = 'hidden1')
  hidden2 = tf.layers.dense(inputs = hidden1,
                           units = NUM_UNITS_H2,
                           activation = tf.nn.relu,
                           name = 'hidden2')

  # 출력층
  logits = tf.layers.dense(inputs = hidden2,
                           units = NUM_CLASSES,
                           name = 'output')

  # 손실 (softmax + entropy)
  one_hot_labels = tf.one_hot(indices = tf.cast(y, tf.int32),
                             depth = NUM_CLASSES)

  cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels = one_hot_labels,
                                                          logits = logits,
                                                          name = 'xentropy')

  loss = tf.reduce_mean(cross_entropy, name = 'xentropy_mean')


  # 손실 최소화
  train_op = tf.train.AdamOptimizer(0.01).minimize(loss)

  # 정답률 계산
  correct_prediction = tf.equal(tf.argmax(logits, 1),
                                 tf.argmax(one_hot_labels, 1))
  accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  
  # 실행
  with tf.Session() as sess:
    #######################################################################
    writer = tf.summary.FileWriter('gs://ml_with_gcp/dnnmodel', sess.graph)
    #######################################################################
    sess.run(tf.global_variables_initializer())
  
    for step in range(1000):
      _, loss_value = sess.run([train_op, loss],
                              feed_dict = {X:X_train, y:y_train})
    
      if step % 100 ==0:
        print('Step: %d, Loss: %f' % (step, loss_value))

    _a = sess.run(accuracy, feed_dict ={X:X_test, y:y_test})
    print('Accuray: %f' % _a)

Step: 0, Loss: 26.555311
Step: 100, Loss: 0.656507
Step: 200, Loss: 0.656150
Step: 300, Loss: 0.656150
Step: 400, Loss: 0.656150
Step: 500, Loss: 0.656149
Step: 600, Loss: 0.656150
Step: 700, Loss: 0.656150
Step: 800, Loss: 0.656150
Step: 900, Loss: 0.656150
Accuray: 0.596491


- TensorBoard를 여는 방법은 CShell에서 `tensorboard --logdir=gs://<BUCKET>/dnnmodel --port 8082`을 입력 후 해당 포트로 연결
![](https://00e9e64bac28a0cb54c605869d8b8c1d4b43d43ea7a5c23e37-apidata.googleusercontent.com/download/storage/v1/b/ml_with_gcp/o/dnnmodel%2FTensorBoard.PNG?qk=AD5uMEvC6XAnaIAR6vw420g7GtrVFbbfJasS74kjTAWKl0thU66um2cx3bWpuUtiHQ9MI3LcYkcYKUEo3vxw-gMjGcT_JBxKSkKifuXVZ8CuDgxkk0yNCzhA0VaNedvUfurbB7-R2NnqvcCTdycEVF7txGzt0tlkwqvFb8NfzD_Bnp96IUr8N5xyTRthlwFiAVLSoBNhYEUCMldlXUq7hbiAX8yKHKvAwkRdB327I5TOyBOE6x-84g5f2ejRd8pZpyvqVZtnfWsdY1SzXOr-MgDx-O1SBvIgbvPfqh_mBJKQlQbmyY5BZHTqS-8yk69JQB5R92z4Ox6n0DMf6aSPQ__aO55vNjW_tH4p5QTRmRAYLMJTr4Ed9a-vQifbjL0YGMjT4GANm5wT1pa9Dq8xfQBpA9HztJzUtpo5LwOMFmnsOph5m4pUl4s6DZXRlAjhFF_bYshiJsIebDyBruwsdC5NLcz8RMPjwmEEe2nSR_KsR0CW1qe7BSS9K79HpZmM6691ztHAW2t5P7k1JNvZglGlY4h8MY1HVtthxOf8GB7HuPop6el1ep2CvJ3vXudwSeQH_N8Aq-DlxZZ2s4C4uYQBwPeUZ1immKB5XIecLMHB4OnOoALMm9pZxG9NWNCdKguf-vm-zxFphURvOMSIb3ZtmIuCHSiqIOk3xJOuS0bRULQwqCQ6YhkYNz74GN8U9KuQrdHgM_gy5nzAB3eXq9_wbDU-Ot9JYuUdiPwR48B3fWI9o2UYBo0WHtBhRVb5DylnEBDA5MqRk8eaixfakEmvixANftZszZHzb2FHYU1HvoR8f3iGEhg)

- 위의 계산 그래프를 보면 복잡하다는 느낌을 받을 수 있음
- 이는 손실과 정답률을 계산하는 저수준의 계산까지 출력하기 때문
- `tf.name_scope`를 사용하면 어느정도 기능적으로 비슷한 부분을 그룹으로 묶어 출력
- '손실'과 '정답률 계산' 영역을 with 문구를 사용해 하나로 묶으면  다음과 같음

In [8]:
NUM_FEATURES = 30
NUM_UNITS_H1 = 4
NUM_UNITS_H2 = 4
NUM_CLASSES = 2

with tf.Graph().as_default():
  # 유닛
  X = tf.placeholder(np.float32, shape = [None, NUM_FEATURES], name = 'X')
  y = tf.placeholder(np.float32, shape = [None], name = 'y')
  
  # 은닉층
  hidden1 = tf.layers.dense(inputs = X,
                           units = NUM_UNITS_H1,
                           activation = tf.nn.relu,
                           name = 'hidden1')
  hidden2 = tf.layers.dense(inputs = hidden1,
                           units = NUM_UNITS_H2,
                           activation = tf.nn.relu,
                           name = 'hidden2')

  # 출력층
  logits = tf.layers.dense(inputs = hidden2,
                           units = NUM_CLASSES,
                           name = 'output')

  # 손실 (softmax + entropy)
  ####################################################################################
  with tf.name_scope('calc_loss'):
    one_hot_labels = tf.one_hot(indices = tf.cast(y, tf.int32),
                               depth = NUM_CLASSES)
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels = one_hot_labels,
                                                            logits = logits,
                                                            name = 'xentropy')
    loss = tf.reduce_mean(cross_entropy, name = 'xentropy_mean')
  ####################################################################################

  # 손실 최소화
  train_op = tf.train.AdamOptimizer(0.01).minimize(loss)

  # 정답률 계산
  ####################################################################################
  with tf.name_scope('calc_accuracy'):
    correct_prediction = tf.equal(tf.argmax(logits, 1),
                                   tf.argmax(one_hot_labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  ####################################################################################
  
  # 실행
  with tf.Session() as sess:
    writer = tf.summary.FileWriter('gs://ml_with_gcp/dnnmodel', sess.graph)
    sess.run(tf.global_variables_initializer())
  
    for step in range(1000):
      _, loss_value = sess.run([train_op, loss],
                              feed_dict = {X:X_train, y:y_train})
    
      if step % 100 ==0:
        print('Step: %d, Loss: %f' % (step, loss_value))

    _a = sess.run(accuracy, feed_dict ={X:X_test, y:y_test})
    print('Accuray: %f' % _a)

Step: 0, Loss: 140.856750
Step: 100, Loss: 0.658099
Step: 200, Loss: 0.656149
Step: 300, Loss: 0.656150
Step: 400, Loss: 0.656149
Step: 500, Loss: 0.656150
Step: 600, Loss: 0.656150
Step: 700, Loss: 0.656150
Step: 800, Loss: 0.656150
Step: 900, Loss: 0.656150
Accuray: 0.596491


- 수정한 TensorBoard는 아래와 같은 모양으로 출력
- 그룹으로 묶은 부분을 클릭하면 내부의 내용을 자세하게 확인 가능
- 코드만 보고 의도한 대로 그래프가 만들어 졌는지 확인하기 어려운 경우 TensorBoard를 활용하면 편리

![](https://00e9e64bacfd19d2d87fec70676809a9ed094c0031b1fa1094-apidata.googleusercontent.com/download/storage/v1/b/ml_with_gcp/o/dnnmodel%2FTensorBoard_with_name_scope.PNG?qk=AD5uMEvWk7eZDpyaHX4tsI_pa-gYcfPolUy9reW10y8jdxLri-ipt-pWutrlWoLfezSh_MKGwmEy1Lv1sSdv2C_FoHs0ZEsCbrIasL5ARmrxgZtgCkm3yK5Ym_FyGiV6EFKVpGf78YjbLoh69R1aH-W4kXkWjE4vr7hnWCBrFqzLQMCmK7fYE4olB_EIu4v1V-fAFGiURH2v67qoEucFXD27mrgHeFgzzRAzauEfwEO7xHerHc27Uw5UAh83BHdB0ockMw46OIdF3vz4r0bAh05WOPZWUtpU5354cZrIYfWM-2WWTBKvSzT-cbBW16u96X_JkEXv1WjigamFIU9Hr5LHcW4qh7Pi97MmoWya2WXPCDi6EL4zapEL80v-rHyAlr0Dwmzm1imT0w1MuRpFLOFz1WvL25eW9SqLBFmzI19CG8y4pCbyYsPnaN6qCOD0KsjyqL2EIUvlD2m596IGk1CedvXnSCgpEmirCGz4V7k7_RTEVHwnxt8vabi1bnat-V6ofBHWj8_l6spa-GIrwveitHIUt12fzJcdJ-w6xwzMom41fNrVCyo6KzZPvmBl1ycZcFv2ZKwPY7cIydoyD_VPmVcZs8tbpW-uKoadNgr553NBZMXjyjwHA-mLUp7vI_44z92vYRl28bjJeS3hlYRbGJdhvZNTlyVrBcW2WtibVZBtdeQ6u09aURLu5AOvGU7Ntbk5_MftAiP3-ZBOJ_5mY8Pp-NC0TIZ75bcqXY1r2xCvAOhc5tknzFjmy5OXkVUOBoaNLCG0XBlyAnK78oUUBGJpieISVrvDLTBNbAz1IIXnEG2shxhx-xCe95tJQMIyqL8b8f5v)

___
>## 매개변수 모니터링

- '정답률'과 '손실률' 추이는 학습의 진척 상태를 확인하는데 매우 중요
- 또한 '가중치'와 각 유닛의 '출력값 분포'도 학습이 잘 진행되고 있는지 확인하기 위한 정보 중 하나임  
    ('가중치'가 변화하지 않으면 학습이 진행되고 있지 않음)
- 매개변수 모니터링은 다음 2종의 코드만 추가하면 작동:
    1. 어떤 매개변수를 모니터링할 것인지: `tf.summary.scalar`, `tf.summary.histogram`
    2. 언제 매개변수를 추출할 것인지
- '정답률', '손실', '은닉층의 출력'을 모니터링 한다면 그래프를 그리는 부분 아래에 다음과 같은 코드 추가
```python
tf.summary.scalar('loss', loss)
tf.summary.scalar('accuracy', accuracy)
tf.summary.histogram('hidden1', hidden1)
tf.summary.histogram('hidden2', hidden2)
merged_summary = tf.summary.merge_all()
```

In [12]:
NUM_FEATURES = 30
NUM_UNITS_H1 = 4
NUM_UNITS_H2 = 4
NUM_CLASSES = 2

with tf.Graph().as_default():
  # 유닛
  X = tf.placeholder(np.float32, shape = [None, NUM_FEATURES], name = 'X')
  y = tf.placeholder(np.float32, shape = [None], name = 'y')
  
  # 은닉층
  hidden1 = tf.layers.dense(inputs = X,
                           units = NUM_UNITS_H1,
                           activation = tf.nn.relu,
                           name = 'hidden1')
  hidden2 = tf.layers.dense(inputs = hidden1,
                           units = NUM_UNITS_H2,
                           activation = tf.nn.relu,
                           name = 'hidden2')

  # 출력층
  logits = tf.layers.dense(inputs = hidden2,
                           units = NUM_CLASSES,
                           name = 'output')

  # 손실 (softmax + entropy)
  with tf.name_scope('calc_loss'):
    one_hot_labels = tf.one_hot(indices = tf.cast(y, tf.int32),
                               depth = NUM_CLASSES)
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels = one_hot_labels,
                                                            logits = logits,
                                                            name = 'xentropy')
    loss = tf.reduce_mean(cross_entropy, name = 'xentropy_mean')
  

  # 손실 최소화
  train_op = tf.train.AdamOptimizer(0.01).minimize(loss)

  # 정답률 계산
  with tf.name_scope('calc_accuracy'):
    correct_prediction = tf.equal(tf.argmax(logits, 1),
                                   tf.argmax(one_hot_labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  
  
  # 실행
  tf.summary.scalar('loss', loss)
  tf.summary.scalar('accuracy', accuracy)
  tf.summary.histogram('hidden1', hidden1)
  tf.summary.histogram('hidden2', hidden2)
  merged_summary = tf.summary.merge_all()
  
  with tf.Session() as sess:
    writer = tf.summary.FileWriter('gs://ml_with_gcp/dnnmodel', sess.graph)
    sess.run(tf.global_variables_initializer())
  
    for step in range(1000):
      _, loss_value = sess.run([train_op, loss],
                              feed_dict = {X:X_train, y:y_train})
    
      if step % 100 ==0:
        #######################################################################
        s = sess.run(merged_summary, feed_dict = {X:X_train, y:y_train})
        writer.add_summary(s, step)
        writer.flush()
        print('Step: %d, Loss: %f' % (step, loss_value))
        #######################################################################
        
    _a = sess.run(accuracy, feed_dict ={X:X_test, y:y_test})
    print('Accuray: %f' % _a)

Accuray: 0.596491
