In [0]:
import os
import re
import datetime
import numpy as np
from PIL import Image
from glob import glob
import pandas as pd
import matplotlib.pyplot as plt

In [0]:
def prepae_data():
    x, t = [], []

    categorized_dir_paths = glob('/content/drive/My Drive/kikagaku/novelapp/data/images/categorized-2/*')

    for dir_path in categorized_dir_paths:
        category_id = os.path.basename(dir_path)
        image_paths = glob(os.path.join(dir_path, '*'))
        print(datetime.datetime.now().isoformat(), 'Category', category_id, ':', len(image_paths))

        for i, p in enumerate(image_paths):
            book_id = re.sub(r'(_.*$)', '', os.path.basename(p))

            if i % 30 == 0:
                print(datetime.datetime.now().isoformat(), 'Image:', i, book_id, category_id)

            try:
                img = Image.open(p)
                img_resize = img.resize((229, 229))
                img_np = np.array(img_resize) / 255.0
                img_reshape = img_np.reshape(1, 229, 229, 3)
                #print(datetime.datetime.now().isoformat(), img_np.shape, img_reshape.shape)

                x.append(img_np)
                t.append(category_id)

            except Exception as e:
                print(datetime.datetime.now().isoformat(), 'Error:', e)

    return x, t

In [5]:
x, t = prepae_data()

2020-06-05T11:06:22.296946 Category 0 : 117
2020-06-05T11:06:22.297231 Image: 0 1015 0
2020-06-05T11:06:22.570224 Image: 30 1228 0
2020-06-05T11:06:22.797538 Image: 60 1106 0
2020-06-05T11:06:23.027315 Image: 90 1129 0
2020-06-05T11:06:23.238118 Category 1 : 119
2020-06-05T11:06:23.238869 Image: 0 3636 1
2020-06-05T11:06:23.452985 Image: 30 3722 1
2020-06-05T11:06:23.681271 Image: 60 3832 1
2020-06-05T11:06:23.897107 Image: 90 3777 1
2020-06-05T11:06:24.119322 Category 2 : 129
2020-06-05T11:06:24.120024 Image: 0 3601 2
2020-06-05T11:06:24.341580 Image: 30 3884 2
2020-06-05T11:06:24.571910 Image: 60 3778 2
2020-06-05T11:06:24.801901 Image: 90 3853 2
2020-06-05T11:06:25.020530 Image: 120 1502 2
2020-06-05T11:06:25.095162 Category 3 : 111
2020-06-05T11:06:25.095713 Image: 0 3312 3
2020-06-05T11:06:25.320708 Image: 30 2350 3
2020-06-05T11:06:25.537214 Image: 60 3010 3
2020-06-05T11:06:25.767259 Image: 90 3105 3


In [6]:
x = np.array(x).astype('f')
t = np.array(t).astype('f')
print(x.shape, t.shape)

(476, 229, 229, 3) (476,)


In [0]:
import tensorflow as tf
from tensorflow import keras

In [0]:
from sklearn.model_selection import train_test_split
x_train, x_val, t_train, t_val = train_test_split(x, t, test_size=0.3, random_state=0)

In [9]:
print(x_train.shape, x_val.shape)

(333, 229, 229, 3) (143, 229, 229, 3)


In [0]:
import os
import random

def reset_seed(seed=0):

    os.environ['PYTHONHASHSEED'] = '0'
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

In [0]:
reset_seed(0)

In [12]:
category_count = len(np.unique(t))
print(np.unique(t))
print(category_count)

[0. 1. 2. 3.]
4


In [0]:
from tensorflow.keras import models, layers
from tensorflow.keras.applications import Xception
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV

In [0]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    rotation_range=180,     # randomly rotate images in the range
    zoom_range=0.1,         # randomly zoom image
    width_shift_range=0.1,  # randomly shift images horizontally
    height_shift_range=0.1, # randomly shift images vertically
    horizontal_flip=True,   # randomly flip images horizontally
    vertical_flip=True      # randomly flip images vertically
)

datagen.fit(x_train)

In [0]:
def build_model(units, dropout, lr):
  model_fine = Xception(include_top=False, weights='imagenet', input_shape=x_train.shape[1:])

  for layer in model_fine.layers[:100]:
      layer.trainable = False

  model = models.Sequential()
  model.add(model_fine)

  model.add(layers.Flatten())

  model.add(layers.BatchNormalization())
  model.add(layers.Dense(units, activation='relu'))
  model.add(layers.Dropout(dropout))
  model.add(layers.Dense(category_count, activation='softmax'))

  #optimizer = keras.optimizers.Adagrad(lr=lr)
  #optimizer = keras.optimizers.Adam(lr=lr)
  optimizer = keras.optimizers.SGD(lr=lr)

  model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

  return model

In [16]:
print(datetime.datetime.now().isoformat(), 'Start')

units = [10, 20] #, 40, 60, 120]
dropout = [0.1, 0.2] #, 0.3, 0.4, 0.5]
lr = [0.01, 0.005] #, 0.002, 0.001, 0.0008]
batch_size = [10, 20] #, 30, 60, 100]

param_dict = dict(units=units, 
                  dropout=dropout,
                  lr=lr, 
                  batch_size=batch_size)

model = KerasClassifier(build_fn = build_model, verbose=2)

rand = RandomizedSearchCV(estimator=model, param_distributions=param_dict, n_iter=5)

epochs = 5

for e in range(epochs):
  print(datetime.datetime.now().isoformat(), 'Epoch', e)
  batches = 0

  for x_batch, t_batch in datagen.flow(x_train, t_train, batch_size=32):
    rand.fit(x_batch, t_batch)
    batches += 1
    if batches >= len(x_train) / 32:
      break

print(datetime.datetime.now().isoformat(), 'End')

2020-06-05T11:06:51.356657 Start
2020-06-05T11:06:51.357115 Epoch 0
2/2 - 0s - loss: 1.8336 - accuracy: 0.2400
1/1 - 0s - loss: 1.1045 - accuracy: 0.4286
2/2 - 0s - loss: 2.9181 - accuracy: 0.1600
1/1 - 0s - loss: 2.4506 - accuracy: 0.2857
2/2 - 0s - loss: 2.4177 - accuracy: 0.3077
1/1 - 0s - loss: 1.6006 - accuracy: 0.3333
2/2 - 0s - loss: 3.1563 - accuracy: 0.2308
1/1 - 0s - loss: 1.6979 - accuracy: 0.5000
2/2 - 0s - loss: 2.4955 - accuracy: 0.1538
1/1 - 0s - loss: 12.0159 - accuracy: 0.1667
3/3 - 0s - loss: 8.4349 - accuracy: 0.2400
1/1 - 0s - loss: 30.3675 - accuracy: 0.1429
3/3 - 0s - loss: 5.1748 - accuracy: 0.4000
1/1 - 0s - loss: 6.7744 - accuracy: 0.2857
3/3 - 0s - loss: 11.9428 - accuracy: 0.1538
1/1 - 0s - loss: 8.3489 - accuracy: 0.3333
3/3 - 0s - loss: 8.3928 - accuracy: 0.2308
1/1 - 0s - loss: 12.7454 - accuracy: 0.1667
3/3 - 0s - loss: 5.5588 - accuracy: 0.2692
1/1 - 0s - loss: 8.6514 - accuracy: 0.1667
2/2 - 0s - loss: 3.5263 - accuracy: 0.1200
1/1 - 0s - loss: 2.7352 -

tensorflow.python.framework.errors_impl.ResourceExhaustedError:  OOM when allocating tensor with shape[10,128,112,112] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node sequential_147/xception/block2_sepconv1/separable_conv2d (defined at /usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_validation.py:515) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_train_function_2026693]

Function call stack:
train_function


tensorflow.python.framework.errors_impl.ResourceExhaustedError:  OOM when allocating tensor with shape[10,64,112,112] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node sequential_148/xception/block1_conv2/Conv2D (defined at /usr/local/lib/python3.6/dist-packages/sklearn/model_selection/_validation.py:515) ]]
Hint: If you want to see a list of allocate

ResourceExhaustedError: ignored