In [0]:
import os
import re
import datetime
import numpy as np
from PIL import Image
from glob import glob
import pandas as pd
import matplotlib.pyplot as plt

In [0]:
def prepae_data():
    x, t = [], []

    categorized_dir_paths = glob('/content/drive/My Drive/kikagaku/novelapp/data/images/categorized-2/*')

    for dir_path in categorized_dir_paths:
        category_id = os.path.basename(dir_path)
        image_paths = glob(os.path.join(dir_path, '*'))
        print(datetime.datetime.now().isoformat(), 'Category', category_id, ':', len(image_paths))

        for i, p in enumerate(image_paths):
            book_id = re.sub(r'(_.*$)', '', os.path.basename(p))

            if i % 30 == 0:
                print(datetime.datetime.now().isoformat(), 'Image:', i, book_id, category_id)

            try:
                img = Image.open(p)
                img_resize = img.resize((229, 229))
                img_np = np.array(img_resize) / 255.0

                x.append(img_np)
                t.append(category_id)

            except Exception as e:
                print(datetime.datetime.now().isoformat(), 'Error:', e)

    return x, t

In [3]:
x, t = prepae_data()

2020-06-06T07:28:14.621430 Category 0 : 117
2020-06-06T07:28:14.621856 Image: 0 1015 0
2020-06-06T07:28:33.033133 Image: 30 1228 0
2020-06-06T07:28:50.822824 Image: 60 1106 0
2020-06-06T07:29:08.619442 Image: 90 1129 0
2020-06-06T07:29:24.652269 Category 1 : 119
2020-06-06T07:29:24.652778 Image: 0 3636 1
2020-06-06T07:29:42.202886 Image: 30 3722 1
2020-06-06T07:30:00.291965 Image: 60 3832 1
2020-06-06T07:30:19.083362 Image: 90 3777 1
2020-06-06T07:30:36.181659 Category 2 : 129
2020-06-06T07:30:36.182456 Image: 0 3601 2
2020-06-06T07:30:54.795571 Image: 30 3884 2
2020-06-06T07:31:12.400914 Image: 60 3778 2
2020-06-06T07:31:30.607472 Image: 90 3853 2
2020-06-06T07:31:47.639739 Image: 120 1502 2
2020-06-06T07:31:53.017563 Category 3 : 111
2020-06-06T07:31:53.018254 Image: 0 3312 3
2020-06-06T07:32:10.376655 Image: 30 2350 3
2020-06-06T07:32:27.240215 Image: 60 3010 3
2020-06-06T07:32:44.781525 Image: 90 3105 3


In [4]:
x = np.array(x).astype('f')
t = np.array(t).astype('f')
print(x.shape, t.shape)

(476, 229, 229, 3) (476,)


In [0]:
import tensorflow as tf
from tensorflow import keras

In [0]:
from sklearn.model_selection import train_test_split
x_train, x_val, t_train, t_val = train_test_split(x, t, test_size=0.3, random_state=0)

In [7]:
print(x_train.shape, x_val.shape)

(333, 229, 229, 3) (143, 229, 229, 3)


In [0]:
import os
import random

def reset_seed(seed=0):

    os.environ['PYTHONHASHSEED'] = '0'
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

In [0]:
reset_seed(0)

In [10]:
category_count = len(np.unique(t))
print(np.unique(t))
print(category_count)

[0. 1. 2. 3.]
4


In [0]:
from tensorflow.keras import models, layers
from tensorflow.keras.applications import Xception
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV

In [0]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    #rotation_range=180,
    zoom_range=0.1,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    #vertical_flip=True
)

datagen.fit(x_train)

In [0]:
def build_model(units, dropout, lr):
  model_fine = Xception(include_top=False, weights='imagenet', input_shape=x_train.shape[1:])

  for layer in model_fine.layers[:100]:
      layer.trainable = False

  model = models.Sequential()
  model.add(model_fine)

  model.add(layers.Flatten())

  model.add(layers.BatchNormalization())
  model.add(layers.Dense(units, activation='relu'))
  model.add(layers.Dropout(dropout))
  model.add(layers.Dense(category_count, activation='softmax'))

  #optimizer = keras.optimizers.Adagrad(lr=lr)
  #optimizer = keras.optimizers.Adam(lr=lr)
  optimizer = keras.optimizers.SGD(lr=lr)

  model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

  return model

In [15]:
print(datetime.datetime.now().isoformat(), 'Start')

units = [10] #, 20, 40, 60, 120]
dropout = [0.1, 0.2] #, 0.3, 0.4, 0.5]
lr = [0.01, 0.005] #, 0.002, 0.001, 0.0008]

param_dict = dict(units=units, 
                  dropout=dropout,
                  lr=lr)

model = KerasClassifier(build_fn = build_model, verbose=0)

n_iter = 4
batch_size = n_iter * 32

rand = RandomizedSearchCV(estimator=model, param_distributions=param_dict, n_iter=n_iter)

epochs = 5

for e in range(epochs):
  print(datetime.datetime.now().isoformat(), 'Epoch', e)
  batches = 0

  for x_batch, t_batch in datagen.flow(x_train, t_train, batch_size=batch_size):
    print(x_batch.shape, t_batch.shape)
    rand.fit(x_batch, t_batch)
    batches += 1
    if batches >= len(x_train) / batch_size:
      break

print(datetime.datetime.now().isoformat(), 'End')

2020-06-06T07:58:01.040457 Start
2020-06-06T07:58:01.042624 Epoch 0
(128, 229, 229, 3) (128,)


tensorflow.python.framework.errors_impl.ResourceExhaustedError: OOM when allocating tensor with shape[3,3,32,64] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:RandomUniform]



ResourceExhaustedError: ignored