In [0]:
import os
import re
import datetime
import numpy as np
from PIL import Image
from glob import glob
import pandas as pd
import matplotlib.pyplot as plt

In [0]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def generate_data(x, t):
  datagen = ImageDataGenerator(
    #rotation_range=180,     # randomly rotate images in the range
    zoom_range=0.2,         # randomly zoom image
    width_shift_range=0.2,  # randomly shift images horizontally
    height_shift_range=0.2, # randomly shift images vertically
    horizontal_flip=True,   # randomly flip images horizontally
    #vertical_flip=True      # randomly flip images vertically
  )

  x_list = []
  t_list = []

  x_array = np.expand_dims(x, axis=0)

  datagen.fit(x_array)

  iter = datagen.flow(x_array, batch_size=1)

  for i in range(5):
    ret = iter.next()
    #print(ret.shape, ret.dtype)
    x_list.append(ret[0])
    t_list.append(t)

  return x_list, t_list

In [0]:
def prepae_data():
    x, t = [], []

    categorized_dir_paths = glob('/content/drive/My Drive/kikagaku/novelapp/data/images/categorized-2/*')

    for dir_path in categorized_dir_paths:
        category_id = os.path.basename(dir_path)
        image_paths = glob(os.path.join(dir_path, '*'))
        print(datetime.datetime.now().isoformat(), 'Category', category_id, ':', len(image_paths))

        for i, p in enumerate(image_paths):
            book_id = re.sub(r'(_.*$)', '', os.path.basename(p))

            if i % 30 == 0:
                print(datetime.datetime.now().isoformat(), 'Image:', i, book_id, category_id)

            try:
                img = Image.open(p)
                img_resize = img.resize((229, 229))
                img_np = np.array(img_resize) / 255.0

                x.append(img_np)
                t.append(category_id)

                x_gen, t_gen = generate_data(img_np, category_id)
                x.extend(x_gen)
                t.extend(t_gen)

            except Exception as e:
                print(datetime.datetime.now().isoformat(), 'Error:', e)

    return x, t

In [5]:
x, t = prepae_data()

2020-06-05T12:40:16.253539 Category 0 : 117
2020-06-05T12:40:16.255027 Image: 0 1015 0
2020-06-05T12:40:18.232236 Image: 30 1228 0
2020-06-05T12:40:20.258207 Image: 60 1106 0
2020-06-05T12:40:22.309902 Image: 90 1129 0
2020-06-05T12:40:24.160728 Category 1 : 119
2020-06-05T12:40:24.162511 Image: 0 3636 1
2020-06-05T12:40:26.187736 Image: 30 3722 1
2020-06-05T12:40:28.230822 Image: 60 3832 1
2020-06-05T12:40:30.251362 Image: 90 3777 1
2020-06-05T12:40:32.162836 Category 2 : 129
2020-06-05T12:40:32.163745 Image: 0 3601 2
2020-06-05T12:40:34.124162 Image: 30 3884 2
2020-06-05T12:40:36.130133 Image: 60 3778 2
2020-06-05T12:40:38.152785 Image: 90 3853 2
2020-06-05T12:40:40.096071 Image: 120 1502 2
2020-06-05T12:40:40.707674 Category 3 : 111
2020-06-05T12:40:40.707959 Image: 0 3312 3
2020-06-05T12:40:42.685717 Image: 30 2350 3
2020-06-05T12:40:44.672501 Image: 60 3010 3
2020-06-05T12:40:46.634394 Image: 90 3105 3


In [6]:
print(len(x), len(t))

2856 2856


In [7]:
x = np.array(x).astype('f')
t = np.array(t).astype('f')
print(x.shape, t.shape)

(2856, 229, 229, 3) (2856,)


In [0]:
import tensorflow as tf
from tensorflow import keras

In [0]:
from sklearn.model_selection import train_test_split
x_train, x_val, t_train, t_val = train_test_split(x, t, test_size=0.3, random_state=0)

In [10]:
print(x_train.shape, x_val.shape)

(1999, 229, 229, 3) (857, 229, 229, 3)


In [0]:
import os
import random

def reset_seed(seed=0):
    os.environ['PYTHONHASHSEED'] = '0'
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)

In [0]:
reset_seed(0)

In [13]:
category_count = len(np.unique(t))
print(np.unique(t))
print(category_count)

[0. 1. 2. 3.]
4


In [0]:
from tensorflow.keras import models, layers
from tensorflow.keras.applications import Xception
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV

In [0]:
def build_model(units, dropout, lr):
  model_fine = Xception(include_top=False, weights='imagenet', input_shape=x_train.shape[1:])

  for layer in model_fine.layers[:100]:
      layer.trainable = False

  model = models.Sequential()
  model.add(model_fine)

  model.add(layers.Flatten())

  model.add(layers.BatchNormalization())
  model.add(layers.Dense(units, activation='relu'))
  model.add(layers.Dropout(dropout))
  model.add(layers.Dense(category_count, activation='softmax'))

  #optimizer = keras.optimizers.Adagrad(lr=lr)
  #optimizer = keras.optimizers.Adam(lr=lr)
  optimizer = keras.optimizers.SGD(lr=lr)

  model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

  return model

In [16]:
print(datetime.datetime.now().isoformat(), 'Start')

units = [10] #, 20, 40, 60, 120]
dropout = [0.1] #, 0.2, 0.3, 0.4, 0.5]
lr = [0.01, 0.005] #, 0.002, 0.001, 0.0008]
batch_size = [10] #, 20, 30, 60, 100]

param_dict = dict(units=units, 
                  dropout=dropout,
                  lr=lr, 
                  batch_size=batch_size)

model = KerasClassifier(build_fn = build_model, verbose=2)

rand = RandomizedSearchCV(estimator=model, param_distributions=param_dict, n_iter=3)

rand_result = rand.fit(x_train, t_train)

#history = model.fit_generator(
#    datagen.flow(x_train, t_train, batch_size=32),
#    epochs=100,
#    verbose=2,
#    validation_data=(x_val, t_val),
#    #steps_per_epoch=x_train.shape[0]
#)

print(datetime.datetime.now().isoformat(), 'End')

2020-06-05T12:42:18.952683 Start




Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
160/160 - 24s - loss: 1.5888 - accuracy: 0.2627
40/40 - 4s - loss: 1.3859 - accuracy: 0.2600
160/160 - 24s - loss: 2.0877 - accuracy: 0.2864
40/40 - 4s - loss: 1.3845 - accuracy: 0.2850
160/160 - 24s - loss: 1.6838 - accuracy: 0.2683
40/40 - 4s - loss: 1.3832 - accuracy: 0.2300
160/160 - 24s - loss: 2.2479 - accuracy: 0.2727
40/40 - 4s - loss: 1.3866 - accuracy: 0.2675
160/160 - 24s - loss: 1.8403 - accuracy: 0.3106
40/40 - 4s - loss: 1.3780 - accuracy: 0.3158
160/160 - 24s - loss: 2.6622 - accuracy: 0.3759
40/40 - 4s - loss: 1.3161 - accuracy: 0.4275
160/160 - 24s - loss: 1.9702 - accuracy: 0.4184
40/40 - 4s - loss: 1.2535 - accuracy: 0.5025
160/160 - 24s - loss: 2.1861 - accuracy: 0.2708
40/40 - 4s - loss: 1.3831 - accuracy: 0.3250
160/160 - 24s - loss: 2.6016 - accuracy: 0.3809
40/40 - 4s - loss: 1.2838 - accuracy: 0.4700
160/160 - 24s - lo

In [17]:
pd.DataFrame(rand.cv_results_).sort_values('rank_test_score').T

Unnamed: 0,1,0
mean_fit_time,33.2252,36.158
std_fit_time,0.452909,6.51426
mean_score_time,5.18198,5.17714
std_score_time,0.0183455,0.0481744
param_units,10,10
param_lr,0.005,0.01
param_dropout,0.1,0.1
param_batch_size,10,10
params,"{'units': 10, 'lr': 0.005, 'dropout': 0.1, 'ba...","{'units': 10, 'lr': 0.01, 'dropout': 0.1, 'bat..."
split0_test_score,0.4275,0.26


In [18]:
rand.best_params_

{'batch_size': 10, 'dropout': 0.1, 'lr': 0.005, 'units': 10}