In [1]:
# 101 개의 categories
# 이미지 크기가 모두 달라서, resize 해줘야함.
# 이미지 불러올 때 쓰는 library
from PIL import Image
import os,glob
import numpy as np
import torch.nn as nn

## build data

In [2]:
caltech_dir = "101_ObjectCategories"
# 101 개는 너무 많으니까 5개 예측하는 것 먼저.
# okapi - 말
categories = ["brain", "camera", "crayfish", "helicopter", "okapi"]

In [3]:
n_class=len(categories)

In [4]:
# space of data
X=[]
y=[]


In [5]:
image_width=224; image_height=224

In [6]:

# image load
for i,f in enumerate(categories):
    #label one hot encoding
    label=[0 for j in range(n_class)]
    label[i]=1
    images_list=caltech_dir+"/"+f
    # 폴더 내 파일들을 가져옴
    files=glob.glob(images_list+"/*.jpg")
    for i, fname in enumerate(files):
        img=Image.open(fname)
        # 색상이 있음으로 RGB로 변환
        img=img.convert("RGB")
        img=img.resize((image_width,image_height),3)
        data=np.asarray(img)
        X.append(data)
        y.append(label)
    
    
    

In [8]:
# numpy 로 바꿔줘야함.
X=np.array(X); y=np.array(y)



### train, test set 분할


In [9]:

from sklearn.model_selection import train_test_split

In [10]:
X_train, X_test, Y_train, Y_test = train_test_split(X, y,test_size=0.33)

# 정규화를 시켜줘야한다! /225로 나누기 
X_train=X_train.astype("float")/225
X_test=X_test.astype("float")/225

## parameter setting and training model with keras


In [12]:
X_train.shape[1:]

(224, 224, 3)

In [13]:
nfilter=bsize=32; opt=['adam','rmsprop']

In [14]:
from keras.models import Sequential
from keras.layers.convolutional import Conv2D,ZeroPadding2D,MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


### - BASIC MODEL (Basic CNN)

![title](basic_model.jpg)

In [24]:
def basic_model():
    model = Sequential()

    model.add(Conv2D(nfilter, (3, 3), padding="same", input_shape = X_train.shape[1:]))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))


    model.add(Conv2D(2*nfilter, (3, 3), padding="same"))
    model.add(Activation('relu'))
    model.add(Conv2D(2*nfilter, (3, 3)))
    model.add(MaxPooling2D(pool_size=(2,2)))


    model.add(Flatten())
    model.add(Dense(512))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))
    model.add(Dense(n_class))
    model.add(Activation('softmax'))
    
    
    model.compile(loss='binary_crossentropy', optimizer=opt[1], metrics=['accuracy'])

    return model


    

In [25]:
basic_model=basic_model()


score = basic_model.evaluate(X_test, Y_test, batch_size=bsize)
print("\n\n\n\nloss =", score[0], ", accuracy =", score[1],", baseline error: %.2f%%" % (100-score[1]*100))





loss = 0.5080604699620029 , accuracy = 0.8000000621143141 , baseline error: 20.00%


## - Alexnet

In [24]:
X_a=[]
Y_a=[]

In [25]:

# image load
for i,f in enumerate(categories):
    #label one hot encoding
    label=[0 for j in range(n_class)]
    label[i]=1
    images_list=caltech_dir+"/"+f
    # 폴더 내 파일들을 가져옴
    files=glob.glob(images_list+"/*.jpg")
    for i, fname in enumerate(files):
        img=Image.open(fname)
        # 색상이 있음으로 RGB로 변환
        img=img.convert("RGB")
        img=img.resize((227,227),3)
        data=np.asarray(img)
        X_a.append(data)
        Y_a.append(label)
    
    
    

In [26]:
# numpy 로 바꿔줘야함.
X_a=np.array(X_a); Y_a=np.array(Y_a)

In [29]:
X_train_a, X_test_a, Y_train_a, Y_test_a = train_test_split(X_a, Y_a,test_size=0.33)

# 정규화를 시켜줘야한다! /227로 나누기 
X_train_a=X_train_a.astype("float")/227
X_test_a=X_test_a.astype("float")/227

In [30]:
def Alexnet():
    model = Sequential()
    model.add(Conv2D(96,(11,11),activation='relu',strides=(4,4),input_shape=(227, 227, 3)))
    model.add(MaxPooling2D((3, 3),strides=(2,2)))
    
    model.add(Conv2D(256,(5,5),activation='relu',strides=(1,1)))
    model.add(MaxPooling2D((3, 3),strides=(2,2)))
    model.add(Conv2D(384,(3,3),activation='relu',strides=(1,1)))
    model.add(Conv2D(384,(3,3),activation='relu',strides=(1,1)))
    model.add(Conv2D(256,(3,3),activation='relu',strides=(1,1)))
    model.add(MaxPooling2D((3, 3),strides=(2,2),padding='valid'))
    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(n_class, activation='softmax'))
    model.compile(loss='binary_crossentropy', optimizer=opt[1], metrics=['accuracy'])
    return model

In [31]:
alex=Alexnet()


score = alex.evaluate(X_test_a, Y_test_a, batch_size=bsize)
print("\n\n\n\nloss =", score[0], ", accuracy =", score[1],", baseline error: %.2f%%" % (100-score[1]*100))





loss = 0.501481176468364 , accuracy = 0.800000011920929 , baseline error: 20.00%


## - VGG16(layer)

망의 깊이가 어떤 영향을 주는지 연구를 하기 위해서 설계된 Network
convolution 3x3 로만 정하고, 깊이를 늘림.
하지만 어마어마한 parameter수, (fully connected layer가 3개고 pooling을 거친 뒤에는 feature map 개수가 2배로 커짐)

![title](vgg16.png)

https://www.pyimagesearch.com/2017/03/20/imagenet-vggnet-resnet-inception-xception-keras/

In [26]:
def VGG_16():
    model = Sequential()
    # 226 x 226 x 3
    model.add(ZeroPadding2D((1,1),input_shape=(224,224,3)))
    # 224 x 224 x 64
    model.add(Conv2D(64, (3, 3), activation='relu'))
    # 226 x 226 x 64
    model.add(ZeroPadding2D((1,1)))
    # 224 x 224 x 64
    model.add(Conv2D(64, (3, 3), activation='relu'))
    # 사이즈가 /2로 줄어듬.
    # (224-2)/2+1 = 122
    # 112 x 122 x 64
    model.add(MaxPooling2D((2,2), strides=(2,2)))
    
    # 114 x 114 x 64
    model.add(ZeroPadding2D((1,1)))
    # 112 x 112 x 128
    model.add(Conv2D(128, (3, 3), activation='relu'))
    # 114 x 114 x 128
    model.add(ZeroPadding2D((1,1)))
    # 112 x 112 x 128
    model.add(Conv2D(128, (3, 3), activation='relu'))
    # (112-2)/2+1= 56 
    # 56 x 56 x 128
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    # 58 x 58 x 128
    model.add(ZeroPadding2D((1,1)))
    # 56 x 56 x 256
    model.add(Conv2D(256, (3, 3), activation='relu'))
    # 58 x 58 x 256
    model.add(ZeroPadding2D((1,1)))
    # 56 x 56 x 256 (계속 해서 이미지 크기가 보존 되고 있음)
    model.add(Conv2D(256, (3, 3), activation='relu'))
    # 56 x 56 x 256
    model.add(ZeroPadding2D((1,1)))
    # 56 x 56 x 256
    model.add(Conv2D(256, (3, 3), activation='relu'))
    # (56-2)/2+1= 28 x 28x 256
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    # 30 x 30 x 256
    model.add(ZeroPadding2D((1,1)))
    # 28 x 28 x 512
    model.add(Conv2D(512, (3, 3), activation='relu'))
    # 30 x 30 x 512
    model.add(ZeroPadding2D((1,1)))
    # 28 x 28 x 512
    model.add(Conv2D(512, (3, 3), activation='relu'))
    # 30 x 30 x 512
    model.add(ZeroPadding2D((1,1)))
    # 28 x 28 x 512
    model.add(Conv2D(512, (3, 3), activation='relu'))
    # (28-2)/2+1 =14 x 14 x 512
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    # 16 x 16 x 512
    model.add(ZeroPadding2D((1,1)))
    # 14 x 14 x 512
    model.add(Conv2D(512, (3, 3), activation='relu'))
    # 16 x 16 x 512
    model.add(ZeroPadding2D((1,1)))
    # 14 x 14 x 512
    model.add(Conv2D(512, (3, 3), activation='relu'))
    # 16 x 16 x 512
    model.add(ZeroPadding2D((1,1)))
    # 14 x 14 x 512
    model.add(Conv2D(512, (3, 3), activation='relu'))
    # (14-2)/2+1 = 7 x 7 x 512
    model.add(MaxPooling2D((2,2), strides=(2,2)))

    
    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    
    # 4096- 5
    model.add(Dense(n_class, activation='softmax'))
    model.compile(loss='binary_crossentropy', optimizer=opt[1], metrics=['accuracy'])
    
    return model

In [27]:
vgg=VGG_16()


score = vgg.evaluate(X_test, Y_test, batch_size=bsize)
print("\n\n\n\nloss =", score[0], ", accuracy =", score[1],", baseline error: %.2f%%" % (100-score[1]*100))





loss = 0.5004072712178815 , accuracy = 0.8000000621143141 , baseline error: 20.00%


In [None]:
## basic -> alexnet -> vgg 로 갈 수록 loss가 준 것을 확인할 수 있다. 