# PetFinder.my Pawpularity Score / Predict

## Imports

In [1]:
# Imports
import os
import numpy as np
from IPython.display import display

# Import utility libs
import petfinder_pawpularity_config as config
import petfinder_pawpularity_lib as mllib
tf_strategy = mllib.tf_strategy()

'TensorFlow Version: 2.6.0'

'TensorFlow Strategy: _DefaultDistributionStrategy'

In [2]:
# Enable retina display
%config InlineBackend.figure_format = "retina"

# Load Tensorboard
%load_ext tensorboard

## Settings

In [3]:
# Settings
settings, debug = config.get_settings(
    process="predict",
    fallback_mode="full"
)
mllib.show_dict(settings)

'Settings: local-predict-cut'

Unnamed: 0,Key,Value
0,debug,True
1,model_load_dir,models
2,model_save_dir,models
3,dataset_dir_src,../input/petfinder-pawpularity-score
4,dataset_dir_cut,../input/petfinder-pawpularity-score
5,dataset_dir_copy,dataset-copy
6,dataset_batch_size,64
7,dataset_image_size,"(150, 150)"
8,dataset_cut_ratio,0.2
9,dataset_shrink_ratio,1.0


## Prepare data

In [4]:
%%time
# Cut training data
dataset_dir = mllib.cut_training_data(
    cut_ratio=settings["dataset_cut_ratio"], 
    dataset_dir_src=settings["dataset_dir_src"], 
    dataset_dir_cut=settings["dataset_dir_cut"]
)
display(dataset_dir)

# Train data
training_data = mllib.load_training_data(dataset_dir)
if debug: 
    display(training_data)
    training_data.hist(bins=500, figsize=(18,3))

# Make training data
map_image_score_fn = lambda image, features, score, file_id: (image, score)  
train_dataset, validate_dataset, test_dataset = mllib.make_training_validate_test_data(
    dataset=mllib.load_training_dataset(
        dataset_dir=dataset_dir,
        mapping_data=training_data,
        batch_size=settings["dataset_batch_size"],
        shuffle=settings["dataset_shuffle"],
        seed=settings["dataset_shuffle_seed"],
        image_size=settings["dataset_image_size"],
    ),
    split_ratios=settings["dataset_split_ratios"],
    shrink_ratio=settings["dataset_shrink_ratio"],
    prefetch=settings["dataset_prefetch"],
)

# Training data infos
print("Train / Validate / Test datasets items: %s / %s / %s" % (
    settings["dataset_batch_size"] * train_dataset().cardinality().numpy(), 
    settings["dataset_batch_size"] * validate_dataset().cardinality().numpy(), 
    settings["dataset_batch_size"] * test_dataset().cardinality().numpy()
))
if debug:
    print("")
    print("Train dataset:")
    mllib.plot_images_scores_from_dataset(train_dataset().take(1).map(map_image_score_fn))
    print("Validate dataset:")
    mllib.plot_images_scores_from_dataset(validate_dataset().take(1).map(map_image_score_fn))
    print("Test dataset:")
    mllib.plot_images_scores_from_dataset(test_dataset().take(1).map(map_image_score_fn))

'../input/petfinder-pawpularity-score-cut-0.200'

'Load training data from ../input/petfinder-pawpularity-score-cut-0.200/train.csv'

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,Pawpularity
0,0007de18844b0dbbb5e1f607da0606e0,0,1,1,1,0,0,1,0,0,0,0,0,63
1,0009c66b9439883ba2750fb825e1d7db,0,1,1,0,0,0,0,0,0,0,0,0,42
2,0013fd999caf9a3efe1352ca1b0d937e,0,1,1,1,0,0,0,0,1,1,0,0,28
3,0018df346ac9c1d8413cfcc888ca8246,0,1,1,1,0,0,0,0,0,0,0,0,15
4,001dc955e10590d3ca4673f034feeef2,0,0,0,1,0,0,1,0,0,0,0,0,72
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1977,32f4f6afaae9f0633ffd11d3e8589689,0,1,1,1,0,1,0,0,0,0,0,0,45
1978,32f7f313e386e95dad34ffed898fe43c,0,1,1,1,0,0,0,1,1,1,0,0,57
1979,32fa718066e9373a5af30dbb77bf32ee,0,0,0,1,0,0,0,0,0,1,1,0,59
1980,32fb50499ca324b5b49ac5b00a63a923,0,1,1,1,0,1,0,0,0,1,1,0,18


## Predict

In [None]:
# Prepare model parameters
def get_model_parameters(settings):
    dataset_image_size = settings["dataset_image_size"]
    model_parameters = {
        "model_id": "model_1",
        "model_prefix": mllib.cut_suffix(settings["dataset_cut_ratio"]),
        "model_base": "xception",
        "input_shape": [dataset_image_size[0], dataset_image_size[1], 3],
        "input_shape_features": len(mllib.feature_fields),
        "output_size": 1,
        "dropout_rate": 0.3,
        "learning_rate": 5e-4,
        "dense_layers": "100",
        "dense_layers_activation": "elu",
        "preload_weights": None,
    }
    model_name = mllib.get_model_name(model_parameters)
    model_parameters["model_name"] = model_name
    return model_parameters
    
model_parameters = get_model_parameters(settings)
mllib.show_dict(model_parameters)

In [None]:
%%time
# Load model
with tf_strategy.scope():
    model = mllib.setup_model(model_parameters)
    model_file = mllib.load_model(model, settings["model_load_dir"])
    print("Loaded Weights: %s" % model_file)

In [None]:
%%time
# Predict on test dataset
if debug:
    with tf_strategy.scope():
        images, features, scores, file_ids, *_ = mllib.load_images_scores_from_dataset(test_dataset().shuffle(100).take(1))
        for index in range(len(images[:5])):
            print("*** Test Image #",index+1)
            mllib.predict(
                model=model, 
                image=images[index],
                features=features[index],
                label=file_ids[index],
                true_score=scores[index], 
            )
            print("")

## Score

### Train

In [None]:
%%time
# Score train submission data
train_score, train_scored_data = mllib.score_submission_data(
    submission_data=mllib.infer_submission_data(
        dataset=train_dataset,
        model=model, 
        take=settings["score_sample_size"]
    ), 
    training_data=training_data
)
display("Train Submission RMSE Score [Samples=%i]: %f" % (settings["score_sample_size"], train_score))
if debug: display(train_scored_data)

### Validate

In [None]:
%%time
# Score validate submission data
validate_score, validate_scored_data = mllib.score_submission_data(
    submission_data=mllib.infer_submission_data(
        dataset=validate_dataset,
        model=model, 
        take=settings["score_sample_size"]
    ), 
    training_data=training_data
)
display("Validate Submission RMSE Score [Samples=%i]: %f" % (settings["score_sample_size"], validate_score))
if debug: display(validate_scored_data)

### Test

In [None]:
%%time
# Score test submission data
test_score, test_scored_data = mllib.score_submission_data(
    submission_data=mllib.infer_submission_data(
        dataset=test_dataset,
        model=model, 
        take=settings["score_sample_size"]
    ), 
    training_data=training_data
)
display("Test Submission RMSE Score [Samples=%i]: %f" % (settings["score_sample_size"], test_score))
if debug: display(test_scored_data)

## Cleanup

In [None]:
%%time
if settings["cleanup_data_flag"]: 
    mllib.delete_training_data(cut_ratio=settings["dataset_cut_ratio"], dataset_dir_cut=settings["dataset_dir_cut"])