# My dataset with VGG16


### 데이터준비

In [None]:
!pip install split-folders

Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


In [None]:
import splitfolders   # 이미지 데이터 분리시 사용

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping

In [None]:
# 데이터 준비
# !unzip -qq "/content/animal.zip" -d "/content/animal"

# !unzip -qq "/content/animal.zip"    # animal.zip안에 animal_data 폴더가 존재해서 그냥 압축 해제

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# 입력 폴더 (각 클래스별로 폴더가 존재해야 함)
input_folder = '/content/drive/MyDrive/KDT뱀조/snake'

# 출력 폴더 (여기에 train, val, test 폴더가 생성됨)
output_folder = '/content'

# 비율로 분할: train 70%, validation 20%, test 10%
splitfolders.ratio(input_folder, output=output_folder, seed=42, ratio=(.7, .2, .1))

# splitfolders.fixed("input_folder", output="output", seed=1337, fixed=(100, 100))

Copying files: 1972 files [01:48, 18.16 files/s]


In [None]:
train = ImageDataGenerator(rescale= 1./255)
val = ImageDataGenerator(rescale= 1./255)
test = ImageDataGenerator(rescale= 1./255)

In [None]:
train_dataset = train.flow_from_directory("/content/train",
                                          target_size= (224, 224),
                                          batch_size= 32,
                                          class_mode= 'sparse')

val_dataset = val.flow_from_directory("/content/val",
                                          target_size= (224, 224),
                                          batch_size= 32,
                                          class_mode= 'sparse')

test_dataset = test.flow_from_directory("/content/test",
                                          target_size= (224, 224),
                                          batch_size= 32,
                                          class_mode= 'sparse')

Found 1348 images belonging to 7 classes.
Found 388 images belonging to 7 classes.
Found 199 images belonging to 7 classes.


In [None]:
train_dataset.class_indices

{'까치살모사': 0,
 '누룩뱀': 1,
 '능구렁이': 2,
 '물뱀': 3,
 '쇠살모사': 4,
 '유혈목이(꽃뱀)': 5,
 '황구렁이': 6}

In [None]:
train_dataset.classes

array([0, 0, 0, ..., 6, 6, 6], dtype=int32)

In [None]:
print('img shape:', train_dataset[0][0][0].shape) # 1번째 배치의 이미지들의 첫번째 이미지
print('label:', train_dataset[0][1][0]) # 1번째 배치의 레이블들의 첫번째 레이블

img shape: (224, 224, 3)
label: 1.0


In [None]:
class_weight = {0: 1.0, 1: 1.0, 2: 1.0, 3: 1.0, 4: 1.0, 5: 1.0, 6: 1.0}
# Adjust weights according to the number of samples in each class
model.fit(train_dataset, validation_data=val_dataset, epochs=10, class_weight=class_weight)

Epoch 1/10
Epoch 2/10
Epoch 3/10

KeyboardInterrupt: 

# Small CNN

In [None]:
# 우리가 만든 vgg16의 정확도가 나오지 않는 것을 확인하고 작은 모델로 다시 테스트하기 위함

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

# 모델 생성
model = Sequential([
    # 첫 번째 컨볼루션 레이어
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D(2, 2),

    # 두 번째 컨볼루션 레이어
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    # Flatten 후 Fully Connected 레이어
    Flatten(),
    Dense(512, activation='relu'),
    # Dropout을 추가하여 과적합 방지
    Dropout(0.5),
    # 출력 레이어
    Dense(4, activation='softmax') # 뱀 종류...!
])

In [None]:
# 모델 컴파일, 훈련, 평가하고 예측을 만듭니다.
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# 모델 최적화를 위한 설정 구간입니다.
modelpath="./custom_vgg16.hdf5"
checkpointer = ModelCheckpoint(filepath=modelpath, monitor='val_loss', verbose=1, save_best_only=True)
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=10)

history = model.fit(train_dataset, validation_data= val_dataset, epochs=20, verbose=1, callbacks=[early_stopping_callback,checkpointer])

Epoch 1/20
Epoch 1: val_loss did not improve from inf
Epoch 2/20
Epoch 2: val_loss did not improve from inf
Epoch 3/20
Epoch 3: val_loss did not improve from inf
Epoch 4/20
Epoch 4: val_loss did not improve from inf
Epoch 5/20
Epoch 5: val_loss did not improve from inf
Epoch 6/20
Epoch 6: val_loss did not improve from inf
Epoch 7/20
Epoch 7: val_loss did not improve from inf
Epoch 8/20
Epoch 8: val_loss did not improve from inf
Epoch 9/20
Epoch 9: val_loss did not improve from inf
Epoch 10/20
Epoch 10: val_loss did not improve from inf


In [None]:
loss, acc = model.evaluate(test_dataset)
print(f"Test Accuracy: {acc:.3f}")

Test Accuracy: 0.085


In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_dataset = train_datagen.flow_from_directory(
    "/content/train",
    target_size=(224, 224),
    batch_size=32,
    class_mode='sparse'
)


Found 1348 images belonging to 7 classes.


## Pre-trained VGG16

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, Dense, Flatten, Dropout
from functools import partial

In [None]:
base_model = VGG16(include_top=False,weights='imagenet',input_shape=(224,224,3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
# 사전 학습된 레이어는 학습되지 않도록 설정
base_model.trainable = False

In [None]:
base_model.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [None]:
# 모델 구조 커스터마이징
x = base_model.output
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(4, activation='softmax')(x)  # 뱀 종류..!

# 새로운 모델 정의
model = Model(inputs= base_model.input, outputs= predictions)

In [None]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 224, 224, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 224, 224, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 112, 112, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 112, 112, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 112, 112, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 56, 56, 128)       0     

In [None]:
# 모델 컴파일, 훈련, 평가하고 예측을 만듭니다.
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# 모델 최적화를 위한 설정 구간입니다.
modelpath="./pretrained_vgg16.hdf5"
checkpointer = ModelCheckpoint(filepath=modelpath, monitor='val_loss', verbose=1, save_best_only=True)
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=10)

history = model.fit(train_dataset, validation_data= val_dataset, epochs=50, verbose=1, callbacks=[early_stopping_callback,checkpointer])

Epoch 1/50
Epoch 1: val_loss improved from inf to 5.24062, saving model to ./pretrained_vgg16.hdf5


  saving_api.save_model(


Epoch 2/50
Epoch 2: val_loss improved from 5.24062 to 1.71299, saving model to ./pretrained_vgg16.hdf5
Epoch 3/50
Epoch 3: val_loss did not improve from 1.71299
Epoch 4/50
Epoch 4: val_loss improved from 1.71299 to 1.20826, saving model to ./pretrained_vgg16.hdf5
Epoch 5/50
Epoch 5: val_loss did not improve from 1.20826
Epoch 6/50
Epoch 6: val_loss improved from 1.20826 to 0.97780, saving model to ./pretrained_vgg16.hdf5
Epoch 7/50
Epoch 7: val_loss did not improve from 0.97780
Epoch 8/50
Epoch 8: val_loss did not improve from 0.97780
Epoch 9/50
Epoch 9: val_loss did not improve from 0.97780
Epoch 10/50
Epoch 10: val_loss did not improve from 0.97780
Epoch 11/50
Epoch 11: val_loss did not improve from 0.97780
Epoch 12/50
Epoch 12: val_loss did not improve from 0.97780
Epoch 13/50
Epoch 13: val_loss did not improve from 0.97780
Epoch 14/50
Epoch 14: val_loss did not improve from 0.97780
Epoch 15/50
Epoch 15: val_loss did not improve from 0.97780
Epoch 16/50
Epoch 16: val_loss did not im

In [None]:
loss, acc = model.evaluate(test_dataset)
print(f"Test Accuracy: {acc:.3f}")

Test Accuracy: 0.730


## Pretrained ResNet50

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input

from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout

In [None]:
# resnet50에 맞는 이미지 전처리를 위해 preprocess_input 함수 사용
# 모델에 맞는 전처리가 적용되지 않으면 정확도가 이상하게 나올 수 있다는 것 확인

train = ImageDataGenerator(preprocessing_function= preprocess_input)
val = ImageDataGenerator(preprocessing_function= preprocess_input)
test = ImageDataGenerator(preprocessing_function= preprocess_input)

In [None]:
train_dataset = train.flow_from_directory("/content/train",
                                          target_size= (224, 224),
                                          batch_size= 32,
                                          class_mode= 'sparse')

val_dataset = val.flow_from_directory("/content/val",
                                          target_size= (224, 224),
                                          batch_size= 32,
                                          class_mode= 'sparse')

test_dataset = test.flow_from_directory("/content/test",
                                          target_size= (224, 224),
                                          batch_size= 32,
                                          class_mode= 'sparse')

Found 241 images belonging to 3 classes.
Found 70 images belonging to 3 classes.
Found 37 images belonging to 3 classes.


In [None]:
base_model = ResNet50(include_top=False,weights='imagenet',input_shape=(224,224,3))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
# 사전 학습된 레이어는 학습되지 않도록 설정
base_model.trainable = False

In [None]:
base_model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 230, 230, 3)          0         ['input_2[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 112, 112, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 conv1_bn (BatchNormalizati  (None, 112, 112, 64)         256       ['conv1_conv[0][0]']          
 on)                                                                                       

In [None]:
# 모델 구조 커스터마이징
x = base_model.output
x = Flatten()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
predictions = Dense(4, activation='softmax')(x) # 뱀 종류...!

# 새로운 모델 정의
model = Model(inputs= base_model.input, outputs= predictions)

In [None]:
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 conv1_pad (ZeroPadding2D)   (None, 230, 230, 3)          0         ['input_2[0][0]']             
                                                                                                  
 conv1_conv (Conv2D)         (None, 112, 112, 64)         9472      ['conv1_pad[0][0]']           
                                                                                                  
 conv1_bn (BatchNormalizati  (None, 112, 112, 64)         256       ['conv1_conv[0][0]']          
 on)                                                                                        

In [None]:
# 모델 컴파일, 훈련, 평가하고 예측을 만듭니다.
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

# 모델 최적화를 위한 설정 구간입니다.
modelpath="./pretrained_resnet50.hdf5"
checkpointer = ModelCheckpoint(filepath=modelpath, monitor='val_loss', verbose=1, save_best_only=True)
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=10)

history = model.fit(train_dataset, validation_data= val_dataset, epochs=50, verbose=1, callbacks=[early_stopping_callback,checkpointer])

Epoch 1/50
Epoch 1: val_loss improved from inf to 7.82847, saving model to ./pretrained_resnet50.hdf5
Epoch 2/50
Epoch 2: val_loss did not improve from 7.82847
Epoch 3/50
Epoch 3: val_loss did not improve from 7.82847
Epoch 4/50
Epoch 4: val_loss improved from 7.82847 to 6.83932, saving model to ./pretrained_resnet50.hdf5
Epoch 5/50
Epoch 5: val_loss improved from 6.83932 to 5.96530, saving model to ./pretrained_resnet50.hdf5
Epoch 6/50
Epoch 6: val_loss improved from 5.96530 to 5.58863, saving model to ./pretrained_resnet50.hdf5
Epoch 7/50
Epoch 7: val_loss did not improve from 5.58863
Epoch 8/50
Epoch 8: val_loss did not improve from 5.58863
Epoch 9/50
Epoch 9: val_loss did not improve from 5.58863
Epoch 10/50
Epoch 10: val_loss did not improve from 5.58863
Epoch 11/50
Epoch 11: val_loss did not improve from 5.58863
Epoch 12/50
Epoch 12: val_loss did not improve from 5.58863
Epoch 13/50
Epoch 13: val_loss did not improve from 5.58863
Epoch 14/50
Epoch 14: val_loss did not improve fro

In [None]:
loss, acc = model.evaluate(test_dataset)
print(f"Test Accuracy: {acc:.3f}")

Test Accuracy: 0.784
