In [8]:
import os
import shutil
import enjoyml
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from skimage.io import imread, imsave
# from sklearn.svm import SVC
# from sklearn.linear_model import LogisticRegression
# from sklearn.decomposition import PCA
# from joblib import dump, load

# from data_engineering import read_data, get_filter_duplicates_query

In [3]:
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
from keras import backend as K

config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.45
set_session(tf.Session(config=config))
K.tensorflow_backend._get_available_gpus()

['/job:localhost/replica:0/task:0/device:GPU:0']

In [4]:
from keras import layers, models, optimizers, callbacks, regularizers

In [9]:
DATA_PATH = 'data/train/'
DATAFLOW_PATH = 'data/train_flow/'
enjoyml.path.make_dir_if_not_exist(DATAFLOW_PATH)
enjoyml.path.make_dir_if_not_exist(DATAFLOW_PATH + 'cat')
enjoyml.path.make_dir_if_not_exist(DATAFLOW_PATH + 'dog')

for img_name in tqdm(os.listdir(DATA_PATH)):
    label, img_number, img_format = img_name.split('.')
    new_file_path = f'{DATAFLOW_PATH + label}/{img_number}.{img_format}'
    shutil.move(DATA_PATH + img_name, new_file_path)

100%|██████████| 25000/25000 [00:00<00:00, 42370.11it/s]


In [None]:
train_indixes, val_indexes = train_test_split(np.arange(work_features_matrix.shape[0]), 
                                              test_size=0.07, stratify=work_labels, random_state=42)
# train_indixes, val_indexes = train_test_split(np.arange(work_features_matrix.shape[0]), 
#                                               test_size=0.50, stratify=work_labels, random_state=42)

print(train_indixes.shape, val_indexes.shape)

In [None]:
fc_model_full = get_fc_model(28)

fc_model_full.compile(loss='sparse_categorical_crossentropy', metrics=['acc'], optimizer='adam')
lr_reducer = callbacks.ReduceLROnPlateau(monitor='loss', factor=5e-2, patience=3, 
                                         min_lr=1e-15, min_delta=0.03, verbose=1)
stopper = callbacks.EarlyStopping(monitor='loss', min_delta=0.0001, patience=10, 
                                  verbose=1, restore_best_weights=True)

fc_model_full.fit(work_features_matrix/255, work_labels_encoded,
                  batch_size=256, epochs=150,
                  callbacks=[lr_reducer, stopper])

In [None]:
fc_model.fit(train_features_matrix/255, train_labels_encoded,
             batch_size=256, epochs=150,
             validation_data=(val_features_matrix/255, val_labels_encoded),
             callbacks=[lr_reducer, stopper])

In [None]:
from keras.models import Model
from keras.layers import Conv2D, MaxPool2D, Input, Dense, Dropout, Flatten, GlobalMaxPool2D
from keras.applications.resnet_v2 import ResNet50V2
from keras.optimizers import Adam
from enjoyml.keras.layers import FixedPooling2D

IMG_SIZE = 250
IMAGE_NET = True

resnet_conv_base = ResNet50V2(
    include_top=False, 
    weights='imagenet' if IMAGE_NET else None, 
    input_shape=(size, size, 3)
)  # imagenet


def get_resnet_model():
    input_ = Input((size, size, 3))
    
    x = resnet_conv_base(input_)

    x = Flatten()(x)

    x = Dropout(0.4)(x)
    x = Dense(2048, activation='relu')(x)
    x = Dropout(0.4)(x)
    x = Dense(512, activation='relu')(x)
    x = Dropout(0.4)(x)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.3)(x)
    output = Dense(5, activation='softmax')(x)

    model = Model(inputs=input_, outputs=output)
    
    for layer in model.layers[1].layers[:-37]:
        layer.trainable = False
    
    return model

resnet_model = get_resnet_model()
if IMAGE_NET:
    #sparse_categorical_crossentropy
    resnet_model.compile(Adam(lr=1e-5), loss='binary_crossentropy', metrics=['accuracy'])
else:
    resnet_model.compile('Adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
resnet_model.summary()

In [13]:
VALIDATION_SPLIT = 0.05
TRAIN_BATCH_SIZE = 64
VAL_BATCH_SIZE = 2

IMAGES_TOTAL_COUNT = 25000

In [12]:
from enjoyml.multiclass import calc_class_weights
from keras.preprocessing.image import ImageDataGenerator


data_gen_args = dict(
    rescale=1/255.,
    rotation_range=0.05,
    width_shift_range=0.025,
    height_shift_range=0.025,
    zoom_range=[0.95, 1.05],
#     horizontal_flip=True,
    vertical_flip=True,
    fill_mode='reflect',
    validation_split=VALIDATION_SPLIT,
)

# val_data_gen_args = dict(
#     rescale=1/255.,
#     horizontal_flip=True,
#     vertical_flip=True,
# )

datagen = ImageDataGenerator(**data_gen_args)
# val_datagen = ImageDataGenerator(**val_data_gen_args)

In [None]:

# resnet_cells_model.fit_generator(
#     datagen.flow(
#         train_cells_images1, 
#         train_labels1, batch_size=64, 
#         shuffle=True
#     ),
#     steps_per_epoch=265,
#     epochs=150,
#     class_weight=calc_class_weights(train_labels1),
#     validation_data=val_datagen.flow(
#         test_cells_images1, 
#         test_labels1, batch_size=8, 
#         shuffle=True
#     ),
# #     callbacks=[model_checkpoint], 
#     validation_steps=500
# )

TRAIN_STEPS_PER_EPOCH = int(IMAGES_TOTAL_COUNT * (1 - VALIDATION_SPLIT)/TRAIN_BATCH_SIZE) + 1
VAL_STEPS_PER_EPOCH = int(IMAGES_TOTAL_COUNT * VALIDATION_SPLIT/VAL_BATCH_SIZE) + 1

resnet_cells_model.fit_generator(
    datagen.flow_from_directory(
        DATAFLOW_PATH,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=TRAIN_BATCH_SIZE,
        class_mode='binary',
        subset='training',
    ),
    steps_per_epoch=TRAIN_STEPS_PER_EPOCH,
    epochs=50,
    class_weight=calc_class_weights(train_labels1),
    validation_data=datagen.flow_from_directory(
        DATAFLOW_PATH,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=VAL_BATCH_SIZE,
        class_mode='binary',
        subset='validation',
    ),
#     callbacks=[model_checkpoint], 
    validation_steps=VAL_STEPS_PER_EPOCH,
)