# PetFinder.my Pawpularity Score / Tuner

## Imports

In [2]:
# Imports
import os
import numpy as np
from IPython.display import display

# Import landmark recognition lib
import petfinder_pawpularity_lib as mllib
tf_strategy = mllib.tf_strategy()

'TensorFlow Version: 2.6.0'

'TensorFlow Strategy: _DefaultDistributionStrategy'

In [3]:
# Enable retina display
%config InlineBackend.figure_format = "retina"

# Load Tensorboard
%load_ext tensorboard

## Settings

### Settings definitions

In [4]:
# Settings Map
if "settingsMap" not in globals(): settingsMap = {}

In [5]:
# tuner-local-cut
settingsMap["tuner-local-cut"] = {
    "debug": False,
    "model_load_dir": os.path.join("models"),
    "model_save_dir": os.path.join("models"),
    "tuner_save_dir": os.path.join("tuner"),
    "dataset_dir_src": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_dir_cut": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_dir_copy": os.path.join("dataset-copy"),
    "dataset_batch_size": 32,
    "dataset_image_size": (75, 75),
    "dataset_cut_ratio": 0.2,
    "dataset_shrink_ratio": 1.0,
    "dataset_split_ratios": [0.7, 0.2, 0.1],
    "dataset_shuffle": False,
    "dataset_shuffle_seed": np.random.seed(42),
    "dataset_prefetch": mllib.tf.data.AUTOTUNE,
    "train_fine_tuning_flag": False,
    "train_load_model_flag": True,
    "tuner_type": "random",
    "tuner_max_epochs": 3,
    "tuner_max_trials": 2,
    "tuner_executions_per_trial": 1,
    "tuner_seed": np.random.seed(42),
    "cleanup_data_flag": False,
}

In [6]:
# tuner-local-cut
settingsMap["tuner-local-full"] = {
    "debug": False,
    "model_load_dir": os.path.join("models"),
    "model_save_dir": os.path.join("models"),
    "tuner_save_dir": os.path.join("tuner"),
    "dataset_dir_src": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_dir_cut": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_dir_copy": os.path.join("dataset-copy"),
    "dataset_batch_size": 64,
    "dataset_image_size": (150, 150),
    "dataset_cut_ratio": 1.0,
    "dataset_shrink_ratio": 1.0,
    "dataset_split_ratios": [0.7, 0.2, 0.1],
    "dataset_shuffle": False,
    "dataset_shuffle_seed": np.random.seed(42),
    "dataset_prefetch": mllib.tf.data.AUTOTUNE,
    "train_fine_tuning_flag": False,
    "train_load_model_flag": True,
    "tuner_type": "random",
    "tuner_max_epochs": 3,
    "tuner_max_trials": 2,
    "tuner_executions_per_trial": 1,
    "tuner_seed": np.random.seed(42),
    "cleanup_data_flag": False,
}

In [7]:
# tuner-remote-cut
settingsMap["tuner-remote-cut"] = {
    "debug": False,
    "model_load_dir": os.path.join("..", "input", "petfinder-pawpularity-train", "models"),
    "model_save_dir": os.path.join("models"),
    "tuner_save_dir": os.path.join("tuner"),
    "dataset_dir_src": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_dir_cut": os.path.join("dataset", "petfinder-pawpularity-score"),
    "dataset_dir_copy": os.path.join("dataset-copy"),
    "dataset_batch_size": 64,
    "dataset_image_size": (500, 500),
    "dataset_cut_ratio": 0.2,
    "dataset_shrink_ratio": 1.0,
    "dataset_split_ratios": [0.7, 0.2, 0.1],
    "dataset_shuffle": False,
    "dataset_shuffle_seed": np.random.seed(42),
    "dataset_prefetch": mllib.tf.data.AUTOTUNE,
    "train_fine_tuning_flag": False,
    "train_load_model_flag": True,
    "tuner_type": "random",
    "tuner_max_epochs": 10,
    "tuner_max_trials": 50,
    "tuner_executions_per_trial": 2,
    "tuner_seed": np.random.seed(42),
    "cleanup_data_flag": True,
}

In [8]:
# tuner-remote-full
settingsMap["tuner-remote-full"] = {
    "debug": False,
    "model_load_dir": os.path.join("..", "input", "petfinder-pawpularity-train", "models"),
    "model_save_dir": os.path.join("models"),
    "tuner_save_dir": os.path.join("tuner"),
    "dataset_dir_src": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_dir_cut": os.path.join("dataset", "petfinder-pawpularity-score"),
    "dataset_dir_copy": os.path.join("dataset-copy"),
    "dataset_batch_size": 64,
    "dataset_image_size": (500, 500),
    "dataset_cut_ratio": 1.0,
    "dataset_shrink_ratio": 1.0,
    "dataset_split_ratios": [0.90, 0.05, 0.05],
    "dataset_shuffle": False,
    "dataset_shuffle_seed": np.random.seed(42),
    "dataset_prefetch": mllib.tf.data.AUTOTUNE,
    "train_fine_tuning_flag": False,
    "train_load_model_flag": True,
    "tuner_type": "random",
    "tuner_max_epochs": 10,
    "tuner_max_trials": 50,
    "tuner_executions_per_trial": 2,
    "tuner_seed": np.random.seed(42),
    "cleanup_data_flag": True,
}

### Selected settings

In [9]:
# Mode
mode = "tuner-local-cut"

In [10]:
# Selected settings
settings = settingsMap[mode]
display(settings)

# Debug
debug = settings["debug"]

{'debug': False,
 'model_load_dir': 'models',
 'model_save_dir': 'models',
 'tuner_save_dir': 'tuner',
 'dataset_dir_src': '../input/petfinder-pawpularity-score',
 'dataset_dir_cut': '../input/petfinder-pawpularity-score',
 'dataset_dir_copy': 'dataset-copy',
 'dataset_batch_size': 32,
 'dataset_image_size': (75, 75),
 'dataset_cut_ratio': 0.2,
 'dataset_shrink_ratio': 1.0,
 'dataset_split_ratios': [0.7, 0.2, 0.1],
 'dataset_shuffle': False,
 'dataset_shuffle_seed': None,
 'dataset_prefetch': -1,
 'train_fine_tuning_flag': False,
 'train_load_model_flag': True,
 'tuner_type': 'random',
 'tuner_max_epochs': 3,
 'tuner_max_trials': 2,
 'tuner_executions_per_trial': 1,
 'tuner_seed': None,
 'cleanup_data_flag': False}

## Prepare data

In [11]:
%%time
# Cut training data
dataset_dir = mllib.cut_training_data(
    cut_ratio=settings["dataset_cut_ratio"], 
    dataset_dir_src=settings["dataset_dir_src"], 
    dataset_dir_cut=settings["dataset_dir_cut"]
)
display(dataset_dir)

# Copy train.csv to output (It may be different with submission dataset)
if settings["dataset_dir_copy"] is not None:
    mllib.copy_file(os.path.join(dataset_dir, "train.csv"), os.path.join(settings["dataset_dir_copy"], "train%s.csv" % mllib.cut_suffix(settings["dataset_cut_ratio"])))

# Train data
training_data = mllib.load_training_data(dataset_dir)
if debug: 
    display(training_data)
    training_data.hist(bins=500, figsize=(18,3))

# Make training data
train_dataset, validate_dataset, test_dataset = mllib.make_training_validate_test_data(
    dataset=mllib.load_training_dataset(
        dataset_dir=dataset_dir,
        mapping_data=training_data,
        batch_size=settings["dataset_batch_size"],
        shuffle=settings["dataset_shuffle"],
        seed=settings["dataset_shuffle_seed"],
        image_size=settings["dataset_image_size"],
    ),
    split_ratios=settings["dataset_split_ratios"],
    shrink_ratio=settings["dataset_shrink_ratio"],
    prefetch=settings["dataset_prefetch"],
    map_fn=lambda image, features, score, file_id: (image, score),
)

# Training data infos
print("Train / Validate / Test datasets items: %s / %s / %s" % (
    settings["dataset_batch_size"] * train_dataset().cardinality().numpy(), 
    settings["dataset_batch_size"] * validate_dataset().cardinality().numpy(), 
    settings["dataset_batch_size"] * test_dataset().cardinality().numpy()
))
if debug:
    print("")
    print("Train dataset:")
    mllib.plot_images_scores_from_dataset(train_dataset().take(1))
    print("Validate dataset:")
    mllib.plot_images_scores_from_dataset(validate_dataset().take(1))
    print("Test dataset:")
    mllib.plot_images_scores_from_dataset(test_dataset().take(1))

'../input/petfinder-pawpularity-score-cut-0.200'

'Copy ../input/petfinder-pawpularity-score-cut-0.200/train.csv to dataset-copy/train-cut-0.200.csv'

'Load training data from ../input/petfinder-pawpularity-score-cut-0.200/train.csv'

Cause: could not parse the source code of <function <lambda> at 0x7f9368a71b80>: no matching AST found
Cause: could not parse the source code of <function <lambda> at 0x7f9368a71b80>: no matching AST found
Train / Validate / Test datasets items: 1376 / 384 / 192
CPU times: user 730 ms, sys: 201 ms, total: 931 ms
Wall time: 749 ms


## Tune model

In [12]:
# Prepare model parameters
def get_model_parameters(settings):
    dataset_image_size = settings["dataset_image_size"]
    model_parameters = {
        "model_name": "model_2_xception" + mllib.cut_suffix(settings["dataset_cut_ratio"]),
        "input_shape": [dataset_image_size[0], dataset_image_size[1], 3],
        "output_size": 1,
        "dense_layers": [512, 256, 128],
        "dropout_rate": 0.3,
        "learning_rate": 5e-4,
        "fine_tuning": settings["train_fine_tuning_flag"],
    }
    model_name = model_parameters["model_name"]
    model_file = mllib.model_file_path_load(model_name, settings["model_load_dir"])
    preload_weights = None if settings["train_load_model_flag"] and os.path.exists(model_file) else "imagenet"
    model_parameters["preload_weights"] = preload_weights
    return model_parameters
model_parameters = get_model_parameters(settings)
display(model_parameters)

# Build hypermodel
def build_hypermodel(parameters):
        def _build_model(hp):
            parameters["dropout_rate"] = hp.Choice("dropout_rate", values=[0.0, 0.1, 0.2, 0.3, 0.4, 0.5])
            parameters["learning_rate"] = hp.Choice("learning_rate", values=[1e-4, 5e-4, 1e-3, 5e-3, 1e-2])
            return mllib.setup_model(parameters)
        return _build_model
hypermodel = build_hypermodel(model_parameters)

{'model_name': 'model_2_xception-cut-0.200',
 'input_shape': [75, 75, 3],
 'output_size': 1,
 'dense_layers': [512, 256, 128],
 'dropout_rate': 0.3,
 'learning_rate': 0.0005,
 'fine_tuning': False,
 'preload_weights': 'imagenet'}

In [13]:
# Tune model
tuner = mllib.tune_model(
    hypermodel=hypermodel, 
    project_name=model_parameters["model_name"], 
    settings=settings, 
    train_dataset=train_dataset, 
    validate_dataset=validate_dataset,
)

Trial 2 Complete [00h 01m 15s]
val_rmse: 24.36414337158203

Best val_rmse So Far: 23.91810417175293
Total elapsed time: 00h 02m 31s
INFO:tensorflow:Oracle triggered exit
Results summary
Results in tuner/model_2_xception-cut-0.200
Showing 10 best trials
Objective(name='val_rmse', direction='min')
Trial summary
Hyperparameters:
dropout_rate: 0.1
learning_rate: 0.0005
Score: 23.91810417175293
Trial summary
Hyperparameters:
dropout_rate: 0.2
learning_rate: 0.0001
Score: 24.36414337158203


## Cleanup

In [14]:
%%time
if settings["cleanup_data_flag"]: 
    mllib.delete_training_data(cut_ratio=settings["dataset_cut_ratio"], dataset_dir_cut=settings["dataset_dir_cut"])

CPU times: user 5 µs, sys: 1e+03 ns, total: 6 µs
Wall time: 11 µs
