In [1]:
import json
import os
from glob import glob
from pathlib import Path

import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

In [10]:
def _path_to_x(path):
    x = tf.io.read_file(path)
    x = tf.image.decode_jpeg(x, channels=3)
    x = tf.image.resize(
        x,
        tuple(meta["image_size"][:2]),
        method=tf.image.ResizeMethod.BICUBIC,
    )
    x = x.numpy().astype(np.uint8)
    return x


def _y_to_labels(y):
    labels = []
    for i, v in enumerate(y):
        if v > 0.5:
            labels.append(meta["classes"][i])
    if len(labels) == 0:
        labels = ["scab"]
    return " ".join(labels)


def _predict(paths, model):
    X = []

    for path in paths:
        X.append(_path_to_x(path))

    y = model.predict(
        np.array(X),
        batch_size=meta["args"]["batch"],
        verbose=0,
    )

    return y

In [4]:
# paths = glob("/app/_data/competition_data/test_images/*.jpg", recursive=True)
paths = glob("/kaggle/input/**/test_images/*.jpg", recursive=True)

files = list(map(os.path.basename, paths))

# MODEL_PATHS = [f"/app/_data/work/models/A_all.fold_{1}" for x in range(5)]
MODEL_PATHS = [f"/kaggle/input/pp21models/A_all.fold_{1+x}" for x in range(5)]

In [6]:
meta = json.loads(Path(f"{MODEL_PATHS[0]}.meta.json").read_text())
Y = np.zeros((len(paths), len(meta["classes"])), dtype=np.float64)

for model_path in MODEL_PATHS:

    print(model_path)
    model = tf.keras.models.load_model(f"{model_path}.h5")

    fold_Y = []
    paths_batch = []

    for path in paths:
        paths_batch.append(path)
        if len(paths_batch) == meta["args"]["batch"]:
            fold_Y += list(_predict(paths_batch))
            paths_batch = []

    if len(paths_batch) > 0:
        fold_Y += list(_predict(paths_batch, model))

    Y += np.array(fold_Y)

    # free GPU memory
    del model
    keras.backend.clear_session()

/app/_data/work/models/A_all.fold_1
INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: Tesla V100-DGXS-32GB, compute capability 7.0
/app/_data/work/models/A_all.fold_1
/app/_data/work/models/A_all.fold_1
/app/_data/work/models/A_all.fold_1
/app/_data/work/models/A_all.fold_1


In [None]:
labels = list(map(_y_to_labels, Y / len(MODEL_PATHS)))

In [None]:
 ! rm /kaggle/working/submission.csv

In [7]:
dfs = pd.DataFrame()
dfs.insert(0, "image", files)
dfs.insert(1, "labels", labels)
dfs.to_csv("/kaggle/working/submission.csv", index=False)

In [8]:
! cat /kaggle/working/submission.csv