In [1]:
%matplotlib inline
%env KERAS_BACKEND=tensorflow
import numpy as np
from keras.models import Sequential
from keras.layers import Conv2D, Activation, MaxPooling2D, Dense
from keras.datasets import cifar100
from keras.utils import np_utils
from keras.preprocessing import image
from spp.SpatialPyramidPooling import SpatialPyramidPooling    # 需引入相關檔案 https://github.com/yhenon/keras-spp
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import os 
from PIL import Image
from io import BytesIO
from IPython.display import clear_output


def clear():
    os.system( 'cls' )

env: KERAS_BACKEND=tensorflow


Using TensorFlow backend.


### 引入CIFAR約10000張圖片作為非meme的圖片

In [2]:
(CIFAR_train, dum1), (dum2, dum3) = cifar100.load_data(label_mode='fine')    # 引入CIFAR，實際上只需要圖片的部分

In [4]:
# 需要將圖檔放大為128*128以利訓練
imgs = []
sele_img = CIFAR_train[:8000]    # 只需要八千筆就好
for k in range(8000):
    pre_img = sele_img[k]/255
    span_img = np.zeros((128,128, 3))
    for u in range(32):
        for v in range(32):
            for i in range(4):
                for j in range(4):
                    span_img[4*u+i][4*v+j] = pre_img[u, v, :3]    # 將圖片放大成128*128
    span_img = np.expand_dims(span_img, axis=0)    # 增加第一個batch維度
    imgs.append(span_img)    # 把圖片數組加到一個列表裡面   

### 引入meme的database

In [10]:
fr = open("memeurls.csv", 'r')
for l in fr:
    break
l = l.split(',')

In [12]:
count = 0
for url in l:
    res = requests.get(url)
    img0 = np.array(Image.open(BytesIO(res.content)))    # 將圖片轉為數組
    if len(img0.shape) == 2:    # 2維代表是GIF，需要排除
        continue
    img = img0/255 
    mini = min(img.shape[0], img.shape[1])
    new_size = mini - mini%128
    mult = new_size//128
    pre_img = img[0:new_size, 0:new_size]    # 將圖片長寬裁為128的倍數
    comp_img = np.zeros((128,128, 3))
    for i in range(128):
        for j in range(128):
            comp_img[i][j] = np.mean(pre_img[i*mult:(i+1)*mult, j*mult:(j+1)*mult, :3], axis=(0,1))    # 將圖片壓縮成128*128
    comp_img = np.expand_dims(comp_img, axis=0)    # 增加第一個batch維度
    imgs.append(comp_img)    # 把圖片數組加到一個列表裡面
    if count%10 == 0:
        clear_output(wait=True)
    count = count + 1
    print("no.%s image loaded."%count)
    
total = count    # 追蹤有多少有效資料(GIF數量約一成)
print("A total of %s image loaded."%total)

no.131 image loaded.
no.132 image loaded.
no.133 image loaded.
no.134 image loaded.
no.135 image loaded.
no.136 image loaded.
no.137 image loaded.
no.138 image loaded.
no.139 image loaded.
A total of 139 image loaded.


### 將蒐集完的圖片concatenate在一起並製作標籤

In [41]:
x2_train = np.concatenate([x for x in imgs])    # 把所有圖片數組concatenate在一起
y2_train = np.zeros((8000+total,2))
for i in range(8000):
    y2_train[i] = [1, 0]    #前八千筆的label是[1,0]，代表不是meme
for i in range(8000,8000+total):
    y2_train[i] = [0, 1]    #八千筆後的label是[0,1]，代表是meme
x1_train = x2_train
y1_train = y2_train    # 初始化x1_train跟y1_train    
count = 0
indices = range(8000+total)    #8000+total為總資料數量
indices = np.array(indices)
np.random.shuffle(indices)    #將index打亂
for i in indices:
    x1_train[count] = x2_train[i]
    y1_train[count] = y2_train[i]    #把打亂後的index依序填入新的陣列
    count = count + 1
x0_train = x1_train[:7000+total]
y0_train = y1_train[:7000+total]
x0_test = x1_train[7000+total:]
y0_test = y1_train[7000+total:]    #切出後面1000筆作為test set

In [None]:
#x0_train.shape
#y0_train.shape
#x0_test.shape
#y0_test.shape

### 資料處理完畢，開始架設神經網路

In [23]:

num_channels = 3
num_classes = 2

model = Sequential()
model.add(Conv2D(2, (9,9), padding='same', input_shape=(None, None, 3)))   #圖片大小不固定所以用NONE
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))   #第一層
model.add(Conv2D(4, (8,8), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))   #第二層
model.add(Conv2D(8, (7,7), padding='same'))
model.add(Activation('relu'))
#model.add(MaxPooling2D(pool_size=(2, 2)))   #第三層(怕最後矩陣變太小這裡先不做pooling)
model.add(Conv2D(16, (6,6), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))   #第四層
model.add(Conv2D(32, (5,5), padding='same'))
model.add(Activation('relu'))
#這裡運用SPP讓結果的大小是固定的
model.add(SpatialPyramidPooling([1, 2, 4])) #會輸出(1+4+16)=21的大小
model.add(Dense(num_classes))   #搭配上行是一個21*2的FC layer
model.add(Activation('softmax'))

model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])    #learning rate不敢亂動QQ


Instructions for updating:
Colocations handled automatically by placer.


In [25]:
x_train = x0_train
y_train = y0_train
model.fit(x_train, y_train, batch_size=500, epochs=15)    #batch_size跟epochs可以再調整

Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Deprecated in favor of operator or tf.math.divide.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0xa08a57f588>

In [26]:
#把得來不易的model存下來
model.save('memerecog_model.h5')

In [43]:
x_test = x0_test
y_test = y0_test
score = model.evaluate(x_test, y_test)
print(f'測試資料的 loss: {score[0]:.5f}')
print(f'測試資料的正確率: {score[1]}')

測試資料的 loss: 0.36642
測試資料的正確率: 0.8205128205128205
