In [None]:
import os
import sys
import json

import sys
from processor import preprocess

import akagi
print("akagi version:", akagi.__version__)
from akagi.data_sources import RedshiftDataSource, S3DataSource
from akagi.iterator import FileFormat

from keras.preprocessing.image import ImageDataGenerator

# For restricted user data retrieval (from misc-internal bucket)

In [None]:
with open('awskeys.txt', 'r') as outfile:
    dic = json.load(outfile)
AWS_KEY_ID = dic["AWS_KEY_ID"]
AWS_SECRET_KEY = dic["AWS_SECRET_KEY"]

In [None]:
os.environ['AWS_ACCESS_KEY_ID'] = AWS_KEY_ID
os.environ['AWS_SECRET_ACCESS_KEY'] = AWS_SECRET_KEY

In [None]:
DATA_DIR = ""

In [None]:
%%time

if not os.path.isdir(DATA_DIR):
    assert (AWS_KEY_ID is not None) and (AWS_SECRET_KEY is not None), "SET your keys."
    os.mkdir(DATA_DIR) 

    REGION_NAME = "ap-northeast-1"
    BUCKET_NAME = ""

    DEFAULT_OUTPUT = "text"
    DIST_DATA_DIR = os.path.normpath(DATA_DIR)
    
    os.environ['AWS_ACCESS_KEY_ID'] = AWS_KEY_ID
    os.environ['AWS_SECRET_ACCESS_KEY'] = AWS_SECRET_KEY
    os.environ['AWS_DEFAULT_REGION'] = REGION_NAME
    os.environ['AWS_DEFAULT_OUTPUT'] = DEFAULT_OUTPUT

    print("sync")
    !aws s3 sync {BUCKET_NAME} {DIST_DATA_DIR}
    print("sync done")

### make download path consistent with akagi (we should have do that beforehand, but put code here)

In [None]:
os.mkdir('') 

In [None]:
!mv 

In [None]:
os.makedirs('')

In [None]:
!mv 

## Original data retreival code

In [None]:
%%time

DATA_DIR = ""
 
if not os.path.isdir(DATA_DIR):
    os.mkdir(DATA_DIR) 

    with S3DataSource.for_prefix(
        'research.ap-northeast-1',
        '',
        FileFormat.BINARY
    ) as ds:
        ds.save(DATA_DIR)
    
    with S3DataSource.for_prefix(
        'research.ap-northeast-1',
        '',
        FileFormat.BINARY
    ) as ds:
        ds.save(DATA_DIR)
    
    with S3DataSource.for_prefix(
        'research.ap-northeast-1',
        '',
        FileFormat.BINARY
    ) as ds:
        ds.save(DATA_DIR)

In [None]:
#Image resize size
SIZE = 224

#Data dirs {train, validation}
TRAIN_DATA_DIR = os.path.normpath(os.path.join(DATA_DIR, ""))
VALID_DATA_DIR = os.path.normpath(os.path.join(DATA_DIR, ""))

In [None]:
TRAIN_DATAGEN = ImageDataGenerator(
        preprocessing_function=preprocess,
        shear_range=0.2,
        zoom_range=0.2,
        channel_shift_range=0.2,
        rotation_range=15,
        width_shift_range=0.25,
        height_shift_range=0.25,
        horizontal_flip=True,
        vertical_flip=False,
        fill_mode='nearest'
)

In [None]:
TRAIN_GENERATOR = TRAIN_DATAGEN.flow_from_directory(
        directory=TRAIN_DATA_DIR,
        target_size=(SIZE, SIZE),
        class_mode='sparse',
        batch_size=16,
        shuffle=True,
        seed=1729
)

In [None]:
VALID_DATAGEN = ImageDataGenerator(
        preprocessing_function=preprocess
)

In [None]:
VALID_GENERATOR = VALID_DATAGEN.flow_from_directory(
        directory=VALID_DATA_DIR,
        target_size=(SIZE, SIZE),
        class_mode='sparse',
        batch_size=16,
        shuffle=True,
        seed=1729
)

In [None]:
BASE_MODEL_NAME = "imagenet"
TRAINED_MODEL_NAME = ""
MODEL_SAVE_PATH = os.path.join("./trained_model/inceptionv3/", TRAINED_MODEL_NAME)

# BASE_MODEL_DIR = "../../trained_model/inceptionv3/"
# TRAINED_MODEL_NAME = ""
# BASE_MODEL_NAME = os.path.normpath("{}/{}".format(BASE_MODEL_DIR, TRAINED_MODEL_NAME))
# MODEL_SAVE_PATH = os.path.join("./trained_model/inceptionv3/", TRAINED_MODEL_NAME)

In [None]:
from keras.applications.inception_v3 import InceptionV3
from keras.models import Model, model_from_json
from keras.layers import Dense, GlobalAveragePooling2D
from keras import optimizers

basemodel_layer_num = 311 #corresponding to len(base_model.layers)

def complile_model(base_model_name, only_top_layer=False):
    '''
    input : base_model_name - 'imagenet' or model_prefix of your trained model
    outpu : compiled model
    '''
    if base_model_name == 'imagenet':
        base_model = InceptionV3(weights='imagenet', include_top=False)
        x = base_model.output
        x = GlobalAveragePooling2D()(x)
        x = Dense(1024, activation='relu')(x)
        predictions = Dense(TRAIN_GENERATOR.num_class, activation='softmax')(x)
        
        model = Model(inputs=base_model.input, outputs=predictions)
        
    else:
        with open("{0}.json".format(base_model_name), 'r') as f:
            model_json = json.dumps(json.load(f)) # Need to convert json to str
            model = model_from_json(model_json)
        with open("{0}-labels.json".format(base_model_name), 'r') as f:
            category_dict = json.load(f)
            
        model.load_weights("{0}.h5".format(base_model_name))
        model = Model(inputs=model.input, outputs=model.output)
    
    #Set layers be trainable
    if only_top_layer:
        for layer in model.layers[:basemodel_layer_num]:
            layer.trainable = False
        for layer in model.layers[basemodel_layer_num:]:
            layer.trainable = True
    else:
        for layer in model.layers:
            layer.trainable = True
    
    #Model compile
    optimizer = optimizers.Adam(lr=0.0001, decay=0.01)
    #optimizer = optimizers.Adagrad(lr=0.0025, epsilon=1e-08, decay=0.01)
    #optimizer = optimizers.SGD(lr=0.001, momentum=0.001, decay=0.001, nesterov=True)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=["accuracy"])
    
    return model

In [None]:
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping

FILEPATH = MODEL_SAVE_PATH + "-{epoch:02d}-{val_acc:.3f}.h5"

CHECKPOINT = ModelCheckpoint(
    FILEPATH
    , monitor='val_acc'
    , verbose=1
    , save_best_only=False
    , mode='max'
)

EARLYSTOPPING = EarlyStopping(
    monitor='val_loss'
    , patience=5
    , verbose=1
    , mode='min'
)

CALLBACKS_LIST = [CHECKPOINT]
#CALLBACKS_LIST = [CHECKPOINT, EARLYSTOPPING]

In [None]:
def train_model(model):
    '''
    input : keras model
    output : trained model & save it
    '''
    with open("{0}.json".format(MODEL_SAVE_PATH), 'w') as f:
        json.dump(json.loads(model.to_json()), f) # model.to_json() is a STRING of json
    with open("{0}-labels.json".format(MODEL_SAVE_PATH), 'w') as f:
        json.dump(TRAIN_GENERATOR.class_indices, f)

    model.fit_generator(
        generator=TRAIN_GENERATOR
        #, steps_per_epoch= TRAIN_GENERATOR.n
        , steps_per_epoch= 100
        , epochs=5
        , verbose=1
        , validation_data=VALID_GENERATOR
        #, validation_steps=VALID_GENERATOR.n
        , validation_steps=10
        , callbacks=CALLBACKS_LIST
    )
    
    model.save_weights('{0}.h5'.format(MODEL_SAVE_PATH))    
    return model

In [None]:
MODEL = complile_model(BASE_MODEL_NAME, only_top_layer=True)

In [None]:
MODEL = train_model(MODEL)