# PetFinder.my Pawpularity Score / Predict

## Imports

In [1]:
# Imports
import os
import numpy as np
from IPython.display import display

# Import landmark recognition lib
import petfinder_pawpularity_lib as mllib
tf_strategy = mllib.tf_strategy()

'TensorFlow Version: 2.6.0'

'TensorFlow Strategy: _DefaultDistributionStrategy'

In [2]:
# Enable retina display
%config InlineBackend.figure_format = "retina"

# Load Tensorboard
%load_ext tensorboard

## Settings

### Settings definitions

In [3]:
# Settings Map
if "settingsMap" not in globals(): settingsMap = {}

In [4]:
# predict-local-cut
settingsMap["predict-local-cut"] = {
    "debug": True,
    "model_load_dir": os.path.join("models"),
    "model_save_dir": os.path.join("models"),
    "dataset_dir_src": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_dir_cut": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_batch_size": 16,
    "dataset_image_size": (250, 250),
    "dataset_cut_ratio": 0.2,
    "dataset_shrink_ratio": 1.0,
    "dataset_split_ratios": [0.7, 0.20, 0.1],
    "dataset_shuffle": False,
    "dataset_shuffle_seed": 42,
    "dataset_prefetch": mllib.tf.data.AUTOTUNE,
    "score_sample_size": 10,
    "cleanup_data_flag": False,
}

In [5]:
# predict-local-full
settingsMap["predict-local-full"] = {
    "debug": False,
    "model_load_dir": os.path.join("models"),
    "model_save_dir": os.path.join("models"),
    "dataset_dir_src": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_dir_cut": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_batch_size": 64,
    "dataset_image_size": (150, 150),
    "dataset_cut_ratio": 1.0,
    "dataset_shrink_ratio": 1.0,
    "dataset_split_ratios": [0.7, 0.2, 0.1],
    "dataset_shuffle": False,
    "dataset_shuffle_seed": 42,
    "dataset_prefetch": mllib.tf.data.AUTOTUNE,
    "score_sample_size": 10,
    "cleanup_data_flag": False,
}

In [6]:
# predict-remote-model-cut
settingsMap["predict-remote-model-cut"] = {
    "debug": False,
    "model_load_dir": os.path.join("..", "input", "petfinder-pawpularity-model", "models"),
    "model_save_dir": os.path.join("models"),
    "dataset_dir_src": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_dir_cut": os.path.join("dataset", "petfinder-pawpularity-score"),
    "dataset_batch_size": 64,
    "dataset_image_size": (500, 500),
    "dataset_cut_ratio": 0.2,
    "dataset_shrink_ratio": 1.0,
    "dataset_split_ratios": [0.7, 0.20, 0.1],
    "dataset_shuffle": False,
    "dataset_shuffle_seed": 42,
    "dataset_prefetch": mllib.tf.data.AUTOTUNE,
    "score_sample_size": 10,
    "cleanup_data_flag": True,
}

In [7]:
# predict-remote-model-full
settingsMap["predict-remote-model-full"] = {
    "debug": False,
    "model_load_dir": os.path.join("..", "input", "petfinder-pawpularity-model", "models"),
    "model_save_dir": os.path.join("models"),
    "dataset_dir_src": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_dir_cut": os.path.join("dataset", "petfinder-pawpularity-score"),
    "dataset_batch_size": 64,
    "dataset_image_size": (750, 750),
    "dataset_cut_ratio": 1.0,
    "dataset_shrink_ratio": 1.0,
    "dataset_split_ratios": [0.90, 0.05, 0.05],
    "dataset_shuffle": True,
    "dataset_shuffle_seed": 42,
    "dataset_prefetch": 1,
    "score_sample_size": 10,
    "cleanup_data_flag": True,
}

In [8]:
# predict-remote-train-cut
settingsMap["predict-remote-train-cut"] = {
    "debug": False,
    "model_load_dir": os.path.join("..", "input", "petfinder-pawpularity-train", "models"),
    "model_save_dir": os.path.join("models"),
    "dataset_dir_src": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_dir_cut": os.path.join("dataset", "petfinder-pawpularity-score"),
    "dataset_batch_size": 64,
    "dataset_image_size": (500, 500),
    "dataset_cut_ratio": 0.2,
    "dataset_shrink_ratio": 1.0,
    "dataset_split_ratios": [0.7, 0.20, 0.1],
    "dataset_shuffle": True,
    "dataset_shuffle_seed": 42,
    "dataset_prefetch": mllib.tf.data.AUTOTUNE,
    "score_sample_size": 10,
    "cleanup_data_flag": True,
}

In [9]:
# predict-remote-train-full
settingsMap["predict-remote-train-full"] = {
    "debug": False,
    "model_load_dir": os.path.join("..", "input", "petfinder-pawpularity-train", "models"),
    "model_save_dir": os.path.join("models"),
    "dataset_dir_src": os.path.join("..", "input", "petfinder-pawpularity-score"),
    "dataset_dir_cut": os.path.join("dataset", "petfinder-pawpularity-score"),
    "dataset_batch_size": 64,
    "dataset_image_size": (750, 750),
    "dataset_cut_ratio": 1.0,
    "dataset_shrink_ratio": 1.0,
    "dataset_split_ratios": [0.90, 0.05, 0.05],
    "dataset_shuffle": True,
    "dataset_shuffle_seed": 42,
    "dataset_prefetch": 1,
    "score_sample_size": 10,
    "cleanup_data_flag": True,
}

### Selected settings

In [10]:
# Mode
mode = "predict-local-full"

In [11]:
# Selected settings
settings = settingsMap[mode]
display(settings)

# Debug
debug = settings["debug"]

{'debug': False,
 'model_load_dir': 'models',
 'model_save_dir': 'models',
 'dataset_dir_src': '../input/petfinder-pawpularity-score',
 'dataset_dir_cut': '../input/petfinder-pawpularity-score',
 'dataset_batch_size': 64,
 'dataset_image_size': (150, 150),
 'dataset_cut_ratio': 1.0,
 'dataset_shrink_ratio': 1.0,
 'dataset_split_ratios': [0.7, 0.2, 0.1],
 'dataset_shuffle': False,
 'dataset_shuffle_seed': 42,
 'dataset_prefetch': -1,
 'score_sample_size': 10,
 'cleanup_data_flag': False}

## Prepare data

In [12]:
%%time
# Cut training data
dataset_dir = mllib.cut_training_data(
    cut_ratio=settings["dataset_cut_ratio"], 
    dataset_dir_src=settings["dataset_dir_src"], 
    dataset_dir_cut=settings["dataset_dir_cut"]
)
display(dataset_dir)

# Train data
training_data = mllib.load_training_data(dataset_dir)
if debug: 
    display(training_data)
    training_data.hist(bins=500, figsize=(18,3))

# Make training data
map_image_score_fn = lambda image, features, score, file_id: (image, score)  
train_dataset, validate_dataset, test_dataset = mllib.make_training_validate_test_data(
    dataset=mllib.load_training_dataset(
        dataset_dir=dataset_dir,
        mapping_data=training_data,
        batch_size=settings["dataset_batch_size"],
        shuffle=settings["dataset_shuffle"],
        seed=settings["dataset_shuffle_seed"],
        image_size=settings["dataset_image_size"],
    ),
    split_ratios=settings["dataset_split_ratios"],
    shrink_ratio=settings["dataset_shrink_ratio"],
    prefetch=settings["dataset_prefetch"],
)

# Training data infos
print("Train / Validate / Test datasets items: %s / %s / %s" % (
    settings["dataset_batch_size"] * train_dataset().cardinality().numpy(), 
    settings["dataset_batch_size"] * validate_dataset().cardinality().numpy(), 
    settings["dataset_batch_size"] * test_dataset().cardinality().numpy()
))
if debug:
    print("")
    print("Train dataset:")
    mllib.plot_images_scores_from_dataset(train_dataset().take(1).map(map_image_score_fn))
    print("Validate dataset:")
    mllib.plot_images_scores_from_dataset(validate_dataset().take(1).map(map_image_score_fn))
    print("Test dataset:")
    mllib.plot_images_scores_from_dataset(test_dataset().take(1).map(map_image_score_fn))

'../input/petfinder-pawpularity-score'

'Load training data from ../input/petfinder-pawpularity-score/train.csv'

Train / Validate / Test datasets items: 6912 / 1984 / 960
CPU times: user 1.88 s, sys: 907 ms, total: 2.78 s
Wall time: 1.8 s


## Predict

In [13]:
# Prepare model parameters
def get_model_parameters(settings):
    dataset_image_size = settings["dataset_image_size"]
    model_parameters = {
        "model_prefix": "model" + mllib.cut_suffix(settings["dataset_cut_ratio"]),
        "model_base": "xception",
        "input_shape": [dataset_image_size[0], dataset_image_size[1], 3],
        "input_shape_features": len(mllib.feature_fields),
        "output_size": 1,
        "dropout_rate": 0.3,
        "learning_rate": 5e-4,
        "dense_layers": "100",
        "dense_layers_activation": "elu",
        "preload_weights": None,
    }
    model_name = mllib.get_model_name(model_parameters)
    model_parameters["model_name"] = model_name
    return model_parameters
    
model_parameters = get_model_parameters(settings)
display(model_parameters)

{'model_prefix': 'model',
 'model_base': 'xception',
 'input_shape': [150, 150, 3],
 'input_shape_features': 12,
 'output_size': 1,
 'dropout_rate': 0.3,
 'learning_rate': 0.0005,
 'dense_layers': '100',
 'dense_layers_activation': 'elu',
 'preload_weights': None,
 'model_name': 'model-xception-input-150x150x3-dense-100-dropout-0.300'}

In [14]:
%%time
# Load model
with tf_strategy.scope():
    model = mllib.setup_model(model_parameters)
    model_file = mllib.load_model(model, settings["model_load_dir"])
    print("Loaded Weights: %s" % model_file)

Model: "model-xception-input-150x150x3-dense-100-dropout-0.300"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 150, 150, 3) 0                                            
__________________________________________________________________________________________________
tf.math.truediv (TFOpLambda)    (None, 150, 150, 3)  0           input[0][0]                      
__________________________________________________________________________________________________
tf.math.subtract (TFOpLambda)   (None, 150, 150, 3)  0           tf.math.truediv[0][0]            
__________________________________________________________________________________________________
xception (Functional)           (None, 5, 5, 2048)   20861480    tf.math.subtract[0][0]           
_____________________________________________

In [15]:
%%time
# Predict on test dataset
if debug:
    with tf_strategy.scope():
        images, features, scores, file_ids, *_ = mllib.load_images_scores_from_dataset(test_dataset().shuffle(100).take(1))
        for index in range(len(images[:5])):
            print("*** Test Image #",index+1)
            mllib.predict(
                model=model, 
                image=images[index],
                features=features[index],
                label=file_ids[index],
                true_score=scores[index], 
            )
            print("")

CPU times: user 4 µs, sys: 2 µs, total: 6 µs
Wall time: 10 µs


## Score

### Train

In [16]:
%%time
# Score train submission data
train_score, train_scored_data = mllib.score_submission_data(
    submission_data=mllib.infer_submission_data(
        dataset=train_dataset,
        model=model, 
        take=settings["score_sample_size"]
    ), 
    training_data=training_data
)
display("Train Submission RMSE Score [Samples=%i]: %f" % (settings["score_sample_size"], train_score))
if debug: display(train_scored_data)



'Train Submission RMSE Score [Samples=10]: 19.290502'

CPU times: user 1min 47s, sys: 20.7 s, total: 2min 8s
Wall time: 34.7 s


### Validate

In [17]:
%%time
# Score validate submission data
validate_score, validate_scored_data = mllib.score_submission_data(
    submission_data=mllib.infer_submission_data(
        dataset=validate_dataset,
        model=model, 
        take=settings["score_sample_size"]
    ), 
    training_data=training_data
)
display("Validate Submission RMSE Score [Samples=%i]: %f" % (settings["score_sample_size"], validate_score))
if debug: display(validate_scored_data)



'Validate Submission RMSE Score [Samples=10]: 19.113076'

CPU times: user 1min 45s, sys: 19.8 s, total: 2min 4s
Wall time: 31 s


### Test

In [18]:
%%time
# Score test submission data
test_score, test_scored_data = mllib.score_submission_data(
    submission_data=mllib.infer_submission_data(
        dataset=test_dataset,
        model=model, 
        take=settings["score_sample_size"]
    ), 
    training_data=training_data
)
display("Test Submission RMSE Score [Samples=%i]: %f" % (settings["score_sample_size"], test_score))
if debug: display(test_scored_data)



'Test Submission RMSE Score [Samples=10]: 19.616197'

CPU times: user 1min 42s, sys: 18.9 s, total: 2min 1s
Wall time: 32.1 s


## Cleanup

In [19]:
%%time
if settings["cleanup_data_flag"]: 
    mllib.delete_training_data(cut_ratio=settings["dataset_cut_ratio"], dataset_dir_cut=settings["dataset_dir_cut"])

CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 8.11 µs
