<a href="https://colab.research.google.com/github/minzzii-kim/machine-learing/blob/main/tensorflow_note_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
# 2-1. 이미지 데이터 분류기 (image)
# 2-2. 정형 데이터 분류기 (structured data)

# [iris 꽃 분류]
# tensorflow datasets 활용한 데이터 전처리 기능 구현 preprocess 함수

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds # 새롭게 추가된 부분

from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint

train_ds = tfds.load('iris', split='train[:80%]')
valid_ds = tfds.load('iris', split='train[80%:]')

#print(train_ds)

# 시험에서의 전처리 요구조건
# 1. label 값에 대해 원핫인코딩
# 2. features(x), label(y) 로 분할

for data in train_ds.take(5):
  x=data['features']
  y=tf.one_hot(data['label'], 3)
  #print('x: ', x)
  #print('y: ', y)

def preprocess(data):
  x=data['features']
  y=data['label']
  y=tf.one_hot(y, 3)
  return x,y

len(train_ds) #120
len(valid_ds) #30


30

In [11]:
# preprocess 함수를 train_ds 에 적용
# batch_size 적용

# 문제에서 batch_size 주어짐
# 메모리에 120개의 이미지를 한번 올릴수 없기때문에, 1epoch 당 batch 단위로 잘라서 올린다.
# batch 갯수만큼 weight update를 진행하기때문에 성능에 큰 영향을 받는다.

batch_size=10
train_data = train_ds.map(preprocess).batch(batch_size)
valid_data = valid_ds.map(preprocess).batch(batch_size)




In [12]:
# 모델 정의 및 생성
model = Sequential([
  #정형데이터는 이미 1D 이기 때문에 flatten 레이어가 필요없음. 바로 dense 레이어 적용
  Dense(1024, activation='relu'),                    
  Dense(512, activation='relu'),                    
  Dense(256, activation='relu'),                    
  Dense(128, activation='relu'),                    
  Dense(64, activation='relu'),                    
  Dense(32, activation='relu'),                    
  Dense(16, activation='relu'),                    
  Dense(3, activation='softmax'),  # 분류문제의 출력층 => 고정되어있음 => loss 함수 선택                 
])

tmp_path = 'tmp_checkpoint.ckpt'
modelcheckpoint_obj = ModelCheckpoint(
    filepath=tmp_path,
    monitor='val_loss',
    save_weights_only=True,
    save_best_only = True,
    verbose=1
)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])



In [13]:
# 학습시작
model.fit(train_data, validation_data=valid_data, epochs=20, callbacks=[modelcheckpoint_obj] )

model.load_weights(tmp_path)

Epoch 1/20
Epoch 00001: val_loss improved from inf to 0.88706, saving model to tmp_checkpoint.ckpt
Epoch 2/20
Epoch 00002: val_loss improved from 0.88706 to 0.64021, saving model to tmp_checkpoint.ckpt
Epoch 3/20
Epoch 00003: val_loss improved from 0.64021 to 0.34527, saving model to tmp_checkpoint.ckpt
Epoch 4/20
Epoch 00004: val_loss did not improve from 0.34527
Epoch 5/20
Epoch 00005: val_loss did not improve from 0.34527
Epoch 6/20
Epoch 00006: val_loss improved from 0.34527 to 0.30395, saving model to tmp_checkpoint.ckpt
Epoch 7/20
Epoch 00007: val_loss improved from 0.30395 to 0.23896, saving model to tmp_checkpoint.ckpt
Epoch 8/20
Epoch 00008: val_loss improved from 0.23896 to 0.19497, saving model to tmp_checkpoint.ckpt
Epoch 9/20
Epoch 00009: val_loss did not improve from 0.19497
Epoch 10/20
Epoch 00010: val_loss did not improve from 0.19497
Epoch 11/20
Epoch 00011: val_loss did not improve from 0.19497
Epoch 12/20
Epoch 00012: val_loss improved from 0.19497 to 0.16346, saving

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f06c895afd0>

In [15]:
model.evaluate(valid_data)



[0.15446384251117706, 0.9333333373069763]