## Train

In [1]:
import numpy as np
import pandas as pd
import keras
import cv2
from keras.applications.vgg19 import VGG19
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [2]:
DATA_DIR = "../data/dog-breed-identification"

# 훈련할 csv 데이터 셋팅
df_train = pd.read_csv(DATA_DIR+"/labels.csv")

# breed 파싱(중첩제거?)
targets_series = pd.Series(df_train['breed'])
# one_hot 변환(0 0 1 0)
one_hot = pd.get_dummies(targets_series, sparse = True)

# 배열로 변환
one_hot_labels = np.asarray(one_hot)

# 이미지 사이즈 정의
im_size = 90

x_train = []
y_train = []

# 훈련할 데이터 셋팅
i = 0
for f, breed in tqdm(df_train.values):
    img = cv2.imread(DATA_DIR+"/train/{}.jpg".format(f))
    label = one_hot_labels[i]
    x_train.append(cv2.resize(img, (im_size, im_size)))
    y_train.append(label)
    i += 1


y_train_raw = np.array(y_train, np.uint8)
x_train_raw = np.array(x_train, np.float32) / 255.

# shape 확인
print(x_train_raw.shape)
print(y_train_raw.shape)

# 분류
num_class = y_train_raw.shape[1]

# 교육할 데이터셋팅 및 성능테스트 데이터 교육데이터에서 30% 할당
X_train, X_valid, Y_train, Y_valid = train_test_split(x_train_raw, y_train_raw, test_size=0.3, random_state=1)

In [5]:
# 사전 교육된 기본 모델 생성
base_model = VGG19(
    weights = 'imagenet',
    # weights = None,
    include_top=False, input_shape=(im_size, im_size, 3))

# model out 정의
x = base_model.output
x = Flatten()(x)
predictions = Dense(num_class, activation='softmax')(x)

# 모델 정의
model = Model(inputs=base_model.input, outputs=predictions)

# 저장된 weights 로드
# model.load_weights('weights.h5')

# 교육할 계층 설정 최상위 계층만 교육 (False 가중치 고정)
for layer in base_model.layers:
    layer.trainable = False

# 훈련할 레이어 확인
for layer in base_model.layers:
    print(layer, layer.trainable)

# 오차역전파 정의
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
callbacks_list = [keras.callbacks.EarlyStopping(monitor='val_acc', patience=3, verbose=1)]
model.summary()

<keras.engine.input_layer.InputLayer object at 0x29d7f1b50> False
<keras.layers.convolutional.conv2d.Conv2D object at 0x112a2ffa0> False
<keras.layers.convolutional.conv2d.Conv2D object at 0x17a050e50> False
<keras.layers.pooling.max_pooling2d.MaxPooling2D object at 0x17a0602b0> False
<keras.layers.convolutional.conv2d.Conv2D object at 0x17a05e730> False
<keras.layers.convolutional.conv2d.Conv2D object at 0x29d7ff5e0> False
<keras.layers.pooling.max_pooling2d.MaxPooling2D object at 0x29d7f1eb0> False
<keras.layers.convolutional.conv2d.Conv2D object at 0x291463b50> False
<keras.layers.convolutional.conv2d.Conv2D object at 0x29d7ff1c0> False
<keras.layers.convolutional.conv2d.Conv2D object at 0x291466a00> False
<keras.layers.convolutional.conv2d.Conv2D object at 0x291468550> False
<keras.layers.pooling.max_pooling2d.MaxPooling2D object at 0x291463220> False
<keras.layers.convolutional.conv2d.Conv2D object at 0x291466790> False
<keras.layers.convolutional.conv2d.Conv2D object at 0x29146e6

In [6]:
from datetime import datetime
from keras.callbacks import EarlyStopping
    
# 학습시작
print("train start  : "+datetime.now())
early_stopping = EarlyStopping(monitor = 'accuracy', patience = 5, mode = 'auto')
model.fit(X_train, Y_train, epochs=100, validation_data=(X_valid, Y_valid), verbose=1, callbacks = [early_stopping])
print("train finish : "+datetime.now())

Epoch 1/100


2022-09-17 22:55:05.322269: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-09-17 22:55:05.563828: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2022-09-17 22:55:20.951105: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100


<keras.callbacks.History at 0x29149fc40>

In [7]:
# 학습된 weights 저장
model.save('VGG19_DOG_weights.h5')

## Predict

In [4]:
# gpu를 끄고 테스트하려 했으나 m1 에서는 안되는 듯 함
# with tf.device('/CPU:0'): 를 통해서 대체
# import os
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [None]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

2022-09-18 15:17:18.172997: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-09-18 15:17:18.173063: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 8483204366621149018
 xla_global_id: -1,
 name: "/device:GPU:0"
 device_type: "GPU"
 locality {
   bus_id: 1
 }
 incarnation: 10354847475270525694
 physical_device_desc: "device: 0, name: METAL, pci bus id: <undefined>"
 xla_global_id: -1]

In [1]:
import numpy as np
import pandas as pd
import cv2
from keras.applications.vgg19 import VGG19
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten
from tqdm import tqdm

In [2]:
DATA_DIR = "../data/dog-breed-identification"

# csv 데이터 셋팅
df_train = pd.read_csv(DATA_DIR+"/labels.csv")
df_test = pd.read_csv(DATA_DIR+"/predict.csv")

# breed 파싱(중첩제거?)
targets_series = pd.Series(df_train['breed'])
# one_hot 변환(0 0 1 0)
one_hot = pd.get_dummies(targets_series, sparse = True)

# 배열로 변환
one_hot_labels = np.asarray(one_hot)

# 이미지 사이즈 정의
im_size = 90

y_train = []
x_test = []

# 예측할 분류 셋팅
i = 0
for f, breed in tqdm(df_train.values):
    label = one_hot_labels[i]
    y_train.append(label)
    i += 1

# 예측할 데이터 셋팅
for f in tqdm(df_test['id'].values):
    img = cv2.imread(DATA_DIR+"/test/{}.jpg".format(f))
    x_test.append(cv2.resize(img, (im_size, im_size)))

y_train_raw = np.array(y_train, np.uint8)
x_test = np.array(x_test, np.float32) / 255.


# shape 확인
print(y_train_raw.shape)
print(x_test.shape)

# 분류
num_class = y_train_raw.shape[1]

100%|███████████████████████████████████████████████████████████████████████| 10222/10222 [00:00<00:00, 1364593.89it/s]
100%|███████████████████████████████████████████████████████████████████████████| 10357/10357 [00:17<00:00, 579.25it/s]


(10222, 120)
(10357, 90, 90, 3)


In [3]:
# 사전 교육된 기본 모델 생성
base_model = VGG19(
    weights = 'imagenet',
    # weights = None,
    include_top=False, input_shape=(im_size, im_size, 3))

# model out 정의
x = base_model.output
x = Flatten()(x)
predictions = Dense(num_class, activation='softmax')(x)

# 모델 정의
model = Model(inputs=base_model.input, outputs=predictions)

# 저장된 weights 로드
model.load_weights('VGG19_DOG_weights.h5')
model.summary()

2022-09-18 15:18:04.109860: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-09-18 15:18:04.109962: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Metal device set to: Apple M1 Pro
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 90, 90, 3)]       0         
                                                                 
 block1_conv1 (Conv2D)       (None, 90, 90, 64)        1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 90, 90, 64)        36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 45, 45, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 45, 45, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 45, 45, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)

In [4]:
import datetime

print("gpu predict start  :", datetime.datetime.now())
# 예측할 데이터 결과
preds = model.predict(x_test, verbose=1)

sub = pd.DataFrame(preds)
# 열 이름을 앞서 한번의 키 인코딩으로 생성된 열 이름으로 설정
col_names = one_hot.columns.values
sub.columns = col_names

sub.head(5)
print(sub)
print("gpu predict finish :", datetime.datetime.now())

predict start  : 2022-09-18 15:14:05.231830


2022-09-18 15:14:05.796922: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-09-18 15:14:05.868036: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


       affenpinscher  afghan_hound  african_hunting_dog      airedale  \
0       6.573294e-02  1.690041e-10         1.161020e-10  1.930093e-05   
1       2.735431e-02  3.351460e-04         2.300926e-05  1.901667e-05   
2       2.061415e-03  2.686264e-04         2.070895e-08  1.987240e-06   
3       1.889682e-05  1.033817e-04         3.254675e-11  3.814876e-05   
4       9.142209e-03  9.940370e-05         1.449613e-02  2.344854e-04   
...              ...           ...                  ...           ...   
10352   1.388291e-09  1.583850e-04         5.192119e-11  2.744825e-09   
10353   1.480103e-02  5.595206e-07         3.628109e-09  1.120093e-02   
10354   1.598422e-03  1.548917e-07         3.122071e-07  1.429822e-04   
10355   6.396257e-08  1.044179e-09         1.406445e-04  1.891592e-05   
10356   1.998745e-05  2.398158e-08         2.707106e-05  3.239789e-05   

       american_staffordshire_terrier   appenzeller  australian_terrier  \
0                            0.313173  1.291099e

In [5]:
import datetime
import tensorflow as tf

with tf.device('/CPU:0'):
    print("cpu predict start  :", datetime.datetime.now())
    # 예측할 데이터 결과
    preds = model.predict(x_test, verbose=1)

    sub = pd.DataFrame(preds)
    # 열 이름을 앞서 한번의 키 인코딩으로 생성된 열 이름으로 설정
    col_names = one_hot.columns.values
    sub.columns = col_names

    sub.head(5)
    print(sub)
    print("cpu predict finish :", datetime.datetime.now())

cpu predict start  : 2022-09-18 15:18:10.065525


2022-09-18 15:18:10.088288: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2022-09-18 15:18:10.140916: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




KeyboardInterrupt: 

In [14]:
# 데이터 프레임 시작 부분의 predict에서 열 ID삽입
sub.insert(0, 'id', df_test['id'])

# 테스트 이미지 예측값 저장
sub.to_csv("predict.csv", mode='w')