<a href="https://colab.research.google.com/github/leechanhoe/deep-learning-project/blob/master/project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import load_model

import pickle
import numpy as np
import matplotlib.pyplot as plt

In [4]:
def load_data(data, labels):
    # 데이터셋의 이미지 개수
    num_images = data.shape[0]

     # (10000, 3072) 형태인 데이터를 (10000, 3, 32, 32) 형태로 변경
    images = np.reshape(data, (num_images, 32, 32, 3), order='F')

    # (10000, 3, 32, 32) 형태의 이미지 데이터를 (10000, 32, 32, 3)로 변경
    images = np.swapaxes(images, 1, 2)
    
    # 레이블과 함께 튜플로 반환
    return images, labels
  

In [14]:
def pre_process(data): 
    images, lbls = load_data(data[b'data'], data[b'labels'])
    images = images.astype('float32') / 255.

    x_train, x_test, y_train, y_test = train_test_split(images, lbls, test_size=0.2)

    y_train = np.array(y_train)
    y_test = np.array(y_test)
    return x_train, x_test, y_train, y_test

In [None]:
def create_cnn_model():
    model = Sequential()

    model.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=(32, 32, 3)))
    # model.add(Conv2D(32, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    # model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax'))
    
    return model

model = create_cnn_model()
model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam(learning_rate=0.0001), metrics=['accuracy'])
keras.utils.plot_model(model, show_shapes=True)

In [9]:
def train(model, filepaths, epochs=30, batch_size=64, callback=[]):
  
    history = []
    for filepath in filepaths:
        # 데이터 로드 및 전처리
        with open(filepath, 'rb') as f:
            data = pickle.load(f, encoding='bytes')

        x_train, x_test, y_train, y_test = pre_process(data)

        # 모델 학습
        history.append(model.fit(x_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(x_test, y_test), callbacks=callback))
    return model, history

In [24]:
file_name = ['/content/drive/MyDrive/Colab Notebooks/data/data_batch_1',
    '/content/drive/MyDrive/Colab Notebooks/data/data_batch_2',
    '/content/drive/MyDrive/Colab Notebooks/data/data_batch_3',
    '/content/drive/MyDrive/Colab Notebooks/data/data_batch_4',
    '/content/drive/MyDrive/Colab Notebooks/data/data_batch_5']

checkpoint = keras.callbacks.ModelCheckpoint('best_cnn_model.h5', save_best_only=True)
# 5번의 연속적인 epoch 동안 개선이 없을 때 학습이 중단되도록 함
early_stopping = keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)

callback = [checkpoint, early_stopping]

# model, history = train(model, file_name, epochs=30, batch_size=64, callback)

In [15]:
with open('/content/drive/MyDrive/Colab Notebooks/data/test_batch', 'rb') as f:
    data = pickle.load(f, encoding='bytes')

x_train, x_test, y_train, y_test = pre_process(data)

In [26]:
# 모델 평가
scores = model.evaluate(x_test, y_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

Test loss: 0.9245866537094116
Test accuracy: 0.6710000038146973


In [None]:
for i in range(len(history)):
    plt.plot(history[i].history['loss'])
    plt.plot(history[i].history['val_loss'])
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(['train', 'val'])
    plt.show()

In [21]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def train_modifyed_img(model, filepaths, epochs=10, batch_size=64, callback=[]):
    datagen = ImageDataGenerator(rotation_range=15,width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True)
    
    history = []
    for filepath in filepaths:
        with open(filepath, 'rb') as f:
            data = pickle.load(f, encoding='bytes')
        x_train, x_test, y_train, y_test = pre_process(data)
        datagen.fit(x_train)

        history.append(model.fit(datagen.flow(x_train, y_train, batch_size=batch_size), epochs=epochs, validation_data=(x_test, y_test), callbacks=callback))
    
    return model, history

In [None]:
model, history = train_modifyed_img(model, file_name, epochs=10, batch_size=64, callback=callback)

In [None]:
plt.imshow(x_test[0].reshape(32, 32, 3))
plt.show()