### Cifar-10 이미지 분류 - v2
- Canadian Institute for Advanced Research, 10 classes
- 데이터 부풀리기 사용

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
seed = 2023
np.random.seed(seed)
tf.random.set_seed(seed)

- 데이터 전처리

In [2]:
from tensorflow.keras.datasets import cifar10
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [3]:
# X data: scaling
X_train = X_train / 255.
X_test = X_test / 255.

In [4]:
# y data : one hot encoding
from tensorflow.keras.utils import to_categorical
Y_train = to_categorical(y_train)
Y_test = to_categorical(y_test)

- 모델 정의/설정/학습/평가

In [5]:
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dropout, Dense
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

In [6]:
model = Sequential([
    Conv2D(32, (3,3), input_shape=X_train.shape[1:], padding='same', activation='relu'),
    Conv2D(32, (3,3), activation='relu'),
    MaxPooling2D(),
    Dropout(0.25),
    Conv2D(64, (3,3), padding='same', activation='relu'),
    Conv2D(64, (3,3), activation='relu'),
    MaxPooling2D(),
    Dropout(0.25),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(10, activation='softmax')
])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 32, 32, 32)        896       
                                                                 
 conv2d_1 (Conv2D)           (None, 30, 30, 32)        9248      
                                                                 
 max_pooling2d (MaxPooling2  (None, 15, 15, 32)        0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 15, 15, 32)        0         
                                                                 
 conv2d_2 (Conv2D)           (None, 15, 15, 64)        18496     
                                                                 
 conv2d_3 (Conv2D)           (None, 13, 13, 64)        36928     
                                                        

In [7]:
model.compile('adam', 'categorical_crossentropy', ['accuracy'])

In [8]:
model_path = 'models/best_cifar10-v2.h5'
mc = ModelCheckpoint(model_path, save_best_only=True, verbose=1)
es = EarlyStopping(patience=5)

- 데이터 부풀리기

In [9]:
# train(50000) > train(40000), valid(10000) / test(10000) : valid 만들기 위함.
from sklearn.model_selection import train_test_split
X_train, X_valid, Y_train, Y_valid = train_test_split(
    X_train, Y_train, stratify=Y_train, test_size=0.2, random_state=seed
)

In [10]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
    # rescale=1/255.,             # scaling되어 있어서 필요없음.
    rotation_range=10,          # randomly rotate images in the range (deg 0 to 15)
    width_shift_range=0.1,      # randomly shift images horizontally
    height_shift_range=0.1,     # randomly shift images vertically
    shear_range=0.1,             # 도형의 기울기
    zoom_range=[0.8,1.2],       # 0.8 ~ 1.5 배 크기로 임의로 확대/축소
    horizontal_flip=True,       # randomly flip images (수평으로)
    vertical_flip=False,        # randomly flip images (수직으로)
    fill_mode='nearest'         # set mode for filling points outside the input boundaries
)

In [11]:
train_datagen.fit(X_train)

- 데이터 부풀리기를 한 결과로 학습

In [12]:
hist = model.fit(
    train_datagen.flow(X_train, Y_train, batch_size=32),
    validation_data=(X_valid, Y_valid),
    epochs=100,
    callbacks=[mc, es]
)

Epoch 1/100
Epoch 1: val_loss improved from inf to 1.30827, saving model to models/best_cifar10-v2.h5


  saving_api.save_model(


Epoch 2/100
Epoch 2: val_loss improved from 1.30827 to 1.18888, saving model to models/best_cifar10-v2.h5
Epoch 3/100
Epoch 3: val_loss improved from 1.18888 to 1.14314, saving model to models/best_cifar10-v2.h5
Epoch 4/100
Epoch 4: val_loss improved from 1.14314 to 1.02947, saving model to models/best_cifar10-v2.h5
Epoch 5/100
Epoch 5: val_loss improved from 1.02947 to 0.92632, saving model to models/best_cifar10-v2.h5
Epoch 6/100
Epoch 6: val_loss improved from 0.92632 to 0.92419, saving model to models/best_cifar10-v2.h5
Epoch 7/100
Epoch 7: val_loss did not improve from 0.92419
Epoch 8/100
Epoch 8: val_loss improved from 0.92419 to 0.88994, saving model to models/best_cifar10-v2.h5
Epoch 9/100
Epoch 9: val_loss improved from 0.88994 to 0.83370, saving model to models/best_cifar10-v2.h5
Epoch 10/100
Epoch 10: val_loss improved from 0.83370 to 0.82579, saving model to models/best_cifar10-v2.h5
Epoch 11/100
Epoch 11: val_loss improved from 0.82579 to 0.77855, saving model to models/be

In [13]:
best_model = load_model(model_path)
best_model.evaluate(X_test, Y_test)



[0.7946377396583557, 0.7175999879837036]