# **웹툰 장르별 썸네일 모델(ResNet50)에 학습**

In [None]:
!pip install tensorflow==2.8

In [None]:
!apt install --allow-change-held-packages libcudnn8=8.1.0.77-1+cuda11.2

In [None]:
pip install split-folders

In [None]:
import splitfolders

import os # 리눅스 명령어를 파이선에서 쓰고 싶을 때

import cv2 # 이미지 읽어들일 때
import numpy as np

from matplotlib import pyplot as plt
import matplotlib.cm as cm

import tensorflow as tf

import pickle

from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

In [None]:
from tensorflow.keras.applications.resnet50 import ResNet50

In [None]:
from tensorflow.keras import datasets, layers, models

from tensorflow.keras.layers import Dense, Flatten, MaxPooling2D
from tensorflow.keras import Input
from tensorflow.keras.layers import Dropout, BatchNormalization

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

## **모델학습전 사전 설정**

In [None]:
# 데이터셋 다운로드
# !!!!!!너무 자주쓰면 접근제한 걸려서 업데이트 된 새 링크로 교체 필요!!!!!!
# !gdown https://drive.google.com/uc?id= #구글드라이브 이미지링크

In [None]:
!unzip -q img.zip

In [None]:
# 경로를 문자열로 놓고 변수화하면 해당 경로의 파일들을 참조하여 읽어들이기 편함
folder_path = '/content/img' 
label_names = os.listdir(folder_path)
label_names

In [None]:
 # 파일에 라벨 달기

dataset = {} # 딕셔너리의 key = 라벨, value = 이미지 경로

# 이미지와 라벨 리스트에 담기
for label in os.listdir(folder_path):
    sub_path = folder_path+'/'+label+'/' # 세부 경로 지정
    dataset[label] = []
    for filename in os.listdir(sub_path):
        dataset[label].append(sub_path+filename)

dataset

In [None]:
!mkdir resized
!mkdir resized/daily
!mkdir resized/comic
!mkdir resized/fantasy
!mkdir resized/action
!mkdir resized/drama
!mkdir resized/pure
!mkdir resized/sensibility
!mkdir resized/thrill
!mkdir resized/historical
!mkdir resized/sports

## **Resize with padding**

### **train data**

In [None]:
for label, filenames in dataset.items():
    for filename in filenames:
        img = cv2.imread(filename) # cv2.imread(filename = 파일경로)

        # 이미지의 x, y가 112이 넘을 경우 작게해주기
        percent = 1
        if(img.shape[1] > img.shape[0]) :       # 이미지의 가로가 세보다 크면 가로를 112으로 맞추고 세로를 비율에 맞춰서
            percent = 112/img.shape[1]
        else :
            percent = 112/img.shape[0]

        img = cv2.resize(img, dsize=(0, 0), fx=percent, fy=percent, interpolation=cv2.INTER_LINEAR)
                # 이미지 범위 지정
        y,x,h,w = (0,0,img.shape[0], img.shape[1])

        # 그림 주변에 검은색으로 칠하기
        w_x = (112-(w-x))/2  # w_x = (112 - 그림)을 뺀 나머지 영역 크기 [ 그림나머지/2 [그림] 그림나머지/2 ]
        h_y = (112-(h-y))/2

        if(w_x < 0):         # 크기가 -면 0으로 지정.
            w_x = 0
        elif(h_y < 0):
            h_y = 0

        M = np.float32([[1,0,w_x], [0,1,h_y]])  #(2*3 이차원 행렬)
        img_re = cv2.warpAffine(img, M, (112, 112)) #이동변환
       
        # cv2.imwrite('{0}.jpg',image .format(file)) #파일저장
        cv2.imwrite('/content/resized/{0}/{1}'.format(label, filename.split("/")[-1]) , img_re)

In [None]:
# 전처리 확인
plt.imshow(cv2.imread('/content/resized/comic/100_comic_103759.jpg'))

In [None]:
import splitfolders
splitfolders.ratio('resized', output='dataset', seed=77, ratio=(0.6, 0.2, 0.2))# trainset 0.6, validationset 0.2
#splitfolders.ratio('resized', output='dataset', seed=77, ratio=(0.8, 0.2)) ## trainset 0.8,testset 0.2

# resized 폴더 내 하위폴더들이 있지만 각각을 자동으로 학습/검증 셋으로 분류해줌

In [None]:
folder_path = '/content/dataset/train'
dataset = {}

# 이미지와 라벨 리스트에 담기
for label in os.listdir(folder_path):
    sub_path = folder_path+'/'+label+'/'
    dataset[label] = []
    for filename in os.listdir(sub_path):
        dataset[label].append(sub_path+filename)

dataset

In [None]:
label2index = {'daily' : 0, 'comic' : 1 , 'fantasy' : 2 , 'action' : 3,
               'drama' : 4, 'pure' : 5, 'sensibility' : 6, 'thrill' : 7, 'historical' : 8, 'sports' : 9}


In [None]:
x_train, y_train = [], []

for label, filenames in dataset.items():
    for filename in filenames:
        image = cv2.imread(filename) # img를 array 형태로 변경

        x_train.append(image)
        y_train.append(label2index[label]) # label을 index로 변경

In [None]:
x_train, y_train = np.array(x_train), np.array(y_train) #array로 형변환. 모델이 입력 받을 때 array로만 받음

In [None]:
x_train = x_train.astype('float32')

In [None]:
x_train.shape, y_train.shape

### **Augmentation**

In [None]:
datagen = ImageDataGenerator(
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
)

In [None]:
# folder_path

In [None]:
from tqdm import tqdm

for label in  tqdm(os.listdir(folder_path)):
    label_path = folder_path + '/' + label + '/'
    for filename in os.listdir(label_path): 
        filepath = label_path + filename

        img = load_img(filepath)
        # img 출력
        # plt.imshow(img)
        # break
        x = img_to_array(img)
        # x.shape 출력
        # print(x.shape)
        # break
        x = x.reshape((1,) + x.shape) # batch_size 값을 하나를 줘야 하므로
        i = 0
        # flow = augmentation 함수
        for batch in datagen.flow(x, batch_size=1,
                                save_to_dir=label_path, save_prefix=label, save_format='jpg'):
            i += 1
            if i > 2:
                break  

In [None]:
folder_path = '/content/dataset/train'
dataset = {}

# 이미지와 라벨 리스트에 담기
for label in os.listdir(folder_path):
    sub_path = folder_path+'/'+label+'/'
    dataset[label] = []
    for filename in os.listdir(sub_path):
        dataset[label].append(sub_path+filename)

dataset

In [None]:
label2index = {'daily' : 0, 'comic' : 1 , 'fantasy' : 2 , 'action' : 3,
               'drama' : 4, 'pure' : 5, 'sensibility' : 6, 'thrill' : 7, 'historical' : 8, 'sports' : 9}

In [None]:
x_train, y_train = [], []

for label, filenames in dataset.items():
    for filename in filenames:
        image = cv2.imread(filename) # img를 array 형태로 변경

        x_train.append(image)
        y_train.append(label2index[label]) # label을 index로 변경

In [None]:
x_train, y_train = np.array(x_train), np.array(y_train)

In [None]:
x_train = x_train.astype('float32')

In [None]:
x_train.shape, y_train.shape

### **validataion data**

In [None]:
folder_path = '/content/dataset/val'
dataset = {}

# 이미지와 라벨 리스트에 담기
for label in os.listdir(folder_path):
    sub_path = folder_path+'/'+label+'/'
    dataset[label] = []
    for filename in os.listdir(sub_path):
        dataset[label].append(sub_path+filename)

dataset

In [None]:
x_val, y_val = [], []

for label, filenames in dataset.items():
    for filename in filenames:
        image = cv2.imread(filename) # img를 array 형태로 변경

        x_val.append(image)
        y_val.append(label2index[label]) # label을 index로 변경

In [None]:
x_val, y_val= np.array(x_val), np.array(y_val)

In [None]:
x_val = x_val.astype('float32')

In [None]:
x_val.shape, y_val.shape

### **test data**

In [None]:
folder_path = '/content/dataset/test'
dataset = {}

# 이미지와 라벨 리스트에 담기
for label in os.listdir(folder_path):
    sub_path = folder_path+'/'+label+'/'
    dataset[label] = []
    for filename in os.listdir(sub_path):
        dataset[label].append(sub_path+filename)

dataset

In [None]:
x_test, y_test = [], []

for label, filenames in dataset.items():
    for filename in filenames:
        image = cv2.imread(filename) # img를 array 형태로 변경

        x_test.append(image)
        y_test.append(label2index[label]) # label을 index로 변경

In [None]:
x_test, y_test = np.array(x_test), np.array(y_test)

In [None]:
x_test = x_test.astype('float32')

In [None]:
x_test.shape, y_test.shape

### **Zero Centering**

In [None]:
def zero_mean(image):
    # zero-centering
    return np.mean(image, axis=0)

In [None]:
zero_mean_img = zero_mean(x_train)

In [None]:
zero_mean_img.shape

In [None]:
x_train -= zero_mean_img

In [None]:
x_val -= zero_mean_img
x_test -= zero_mean_img

# **ResNet50 모델 학습**

전부 재학습 모델

In [None]:
# #전부 재학습 모델
# base_model = ResNet50(include_top=False, input_shape = (112, 112,3), weights = 'imagenet')
# base_model.trainable =True
# for layer in base_model.layers[-100:]:
#    layer.trainable = False	
# inputs = tf.keras.Input(shape=(112, 112,3))

# x = base_model(inputs, training=False)

# x = tf.keras.layers.Flatten(input_shape=base_model.output_shape[1:])(x)
# x = tf.keras.layers.Dense(128, activation='relu')(x)
# x= tf.keras.layers.Dropout(0.5)(x)
# outputs = tf.keras.layers.Dense(10, activation='softmax')(x)

# model = tf.keras.Model(inputs, outputs)   

# model.compile(optimizer = tf.keras.optimizers.Adam( learning_rate= 0.0001),
#                 loss = 'sparse_categorical_crossentropy',
#                 metrics=['accuracy'])

분류기만 재학습 모델

In [None]:
# ## 분류기만 재학습
# base_model = ResNet50(include_top=False, input_shape = (112, 112,3), weights = 'imagenet')# 이미지만 가져다 쓸게
# #base_model.output_shape
# base_model.trainable = False #봉쇄 //베이스모델 하위층
# inputs = tf.keras.Input(shape=(112, 112, 3))
# x = base_model(inputs, training=False) # batchnorm 부분 update 방지
# x = tf.keras.layers.Flatten(input_shape=base_model.output_shape[1:])(x)
# x = tf.keras.layers.Dense(128, activation='relu')(x)
# x= tf.keras.layers.Dropout(0.5)(x)
# # #알렉스넷에서 첨 나옴 1.데이터핸들링(증강통해서)/2.모델복잡도(기존:웨이트의 크기에 규제-but 크기를 줄여도 웨이트의 개수가 많아서 억제시키기 어려웠음 해결방안중 층,노드수를 줄이니까 언더핏 / ) 
# outputs = tf.keras.layers.Dense(10, activation='softmax')(x)

# model = tf.keras.Model(inputs, outputs)#케아스모델로 인풋과 아웃풋 *이건 외워야한다!
# model.compile(optimizer = tf.keras.optimizers.Adam(learning_rate= 0.001),
#                   loss = 'categorical_crossentropy',
#                   metrics=['accuracy'])


일부 재학습 모델

In [None]:
#일부 재학습 모델
base_model = ResNet50(include_top=False, input_shape = (112, 112,3), weights = 'imagenet')

for layer in base_model.layers[:-50]: # 10-50층까지 개방가능
    layer.trainable = False			

inputs = tf.keras.Input(shape=(112, 112, 3))

x = base_model(inputs, training=False) # batchnorm 부분 update 방지

x = tf.keras.layers.Flatten(input_shape=base_model.output_shape[1:])(x)
x = tf.keras.layers.Dense(128, activation='relu')(x)
x= tf.keras.layers.Dropout(0.5)(x)
outputs = tf.keras.layers.Dense(10, activation='softmax')(x)

model = tf.keras.Model(inputs, outputs)

model.compile(optimizer = tf.keras.optimizers.Adam( learning_rate= 0.0001),
                loss = 'sparse_categorical_crossentropy',
                metrics=['accuracy'])

In [None]:
history = model.fit(x_train, y_train, epochs = 8, batch_size= 125, validation_data=(x_val, y_val))

In [None]:
model.evaluate(x_test, y_test)

## **ResNet50 모델 학습결과 시각화**

Model accuracy 그래프

In [None]:
from matplotlib import pyplot as plt

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.xlabel('Epoch')
plt.ylabel('accuracy')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

Model loss 그래프

In [None]:
from matplotlib import pyplot as plt

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.xlabel('Epoch')
plt.ylabel('loss')
plt.legend(['Train', 'Test'], loc='lower right')
plt.show()

# **웹툰 장르별 썸네일 여러가지 모델학습과 텐서보드 적용**

여기서 선택해서 base_model로 선정하면 다른 모델로 바꾸기 가능하다.

In [None]:
#여기서 선택해서 base_model로 선정하면 다른 모델로 바꾸기 가능하다.

# from tensorflow.keras.applications.vgg16 import VGG16
# from tensorflow.keras.applications.vgg19 import VGG19                       
# from tensorflow.keras.applications.inception_v3 import InceptionV3
from tensorflow.keras.applications.resnet import ResNet50 # 결과가 가장 좋음
# model_VGG16 = VGG16(include_top=False, input_shape = (112, 112,3), weights='imagenet')
# model_VGG19 = VGG19(include_top=False, input_shape = (112, 112,3), weights='imagenet')
# model_INCEPTIONV3 = InceptionV3(include_top=False, input_shape = (112, 112,3), weights='imagenet')
model_RESNET50 = ResNet50(include_top=False, input_shape = (112, 112,3), weights='imagenet')

In [None]:
%load_ext tensorboard

In [None]:
import tensorflow as tf
import datetime

In [None]:
## 모델 설정
base_model = model_RESNET50

for layer in base_model.layers[:]: 
    layer.trainable = True			

inputs = tf.keras.Input(shape=(112, 112, 3))

x = base_model(inputs, training=False) # batchnorm 부분 update 방지

x = tf.keras.layers.Flatten(input_shape=base_model.output_shape[1:])(x)
x = tf.keras.layers.Dense(256, activation='relu')(x)
x= tf.keras.layers.Dropout(0.5)(x)
outputs = tf.keras.layers.Dense(10, activation='softmax')(x)

model = tf.keras.Model(inputs, outputs)

model.compile(optimizer = tf.keras.optimizers.Adam( learning_rate= 0.0001),
                loss = 'sparse_categorical_crossentropy',
                metrics=['accuracy'])

In [None]:
model_name = 'version.1'
current_time = datetime.datetime.now().strftime("%Y%m%d") # Y : 연도 / m : 월 / d : 일 / H : 시 / M : 분 / S : 초
current_time

In [None]:
checkpoint_path = "mytraining/cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path) # 현재 경로 폴더 반환

checkpoint_dir

In [None]:
log_dir = "logs/fit/" + current_time + model_name

In [None]:
callbacks = [tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=4, restore_best_weights=True),
             tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path, save_weights_only=True, save_best_only=True, verbose=1),
             tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1) # epoch마다 히스토그램 계산
             ]

In [None]:
model.fit(x_train, y_train, epochs = 20, batch_size= 100, validation_data=(x_val, y_val), callbacks=callbacks)

In [None]:
model.evaluate(x_test, y_test, verbose=2)

In [None]:
%tensorboard --logdir logs/fit