# Compute predictions on a TF dataset using an stored .h5 model

In [1]:
import os

%cd -q '/content'
if os.path.exists('fluence_maps'):
  !rm -fr fluence_maps

GIT_USERNAME = "pablojrios"
GIT_TOKEN = "1d88a0b85d2b00a03796e4d8b7e5f7b249b12f9b"
!git clone -s https://{GIT_TOKEN}@github.com/{GIT_USERNAME}/fluence_maps.git

Cloning into 'fluence_maps'...
remote: Enumerating objects: 118, done.[K
remote: Counting objects: 100% (118/118), done.[K
remote: Compressing objects: 100% (104/104), done.[K
remote: Total 118 (delta 62), reused 36 (delta 13), pack-reused 0[K
Receiving objects: 100% (118/118), 273.86 KiB | 1.05 MiB/s, done.
Resolving deltas: 100% (62/62), done.


In [2]:
import numpy as np
import tensorflow as tf
%cd '/content/fluence_maps'
from util.dataset import _tfrecord_dataset_type_from_folder, _parse_jpeg_image_function
from util.preprocess import rescale_min_1_to_1
import os
import pandas as pd

/content/fluence_maps


In [3]:
print('Tensorflow version = {}'.format(tf.__version__))
print('Executing eagerly = {}'.format(tf.executing_eagerly()))

Tensorflow version = 2.2.0-rc4
Executing eagerly = True


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
#============================DEFINE YOUR ARGUMENTS==============================
# base data directory
ARG_DATASET_DIR='/content/drive/My Drive/Healthcare/Radioterapia/data/ciolaplata'
# folder under ARG_DATASET_DIR path.
ARG_TFDATASET_FOLDER='tfds.2019.pablo'
ARG_MODEL_NAME = '1588803713'
ARG_PART = 'train'

In [7]:
tfdataset_dir = os.path.join(ARG_DATASET_DIR, ARG_TFDATASET_FOLDER)
raw_test = _tfrecord_dataset_type_from_folder(tfdataset_dir, ARG_PART)
print(raw_test)

<TFRecordDatasetV2 shapes: (), types: tf.string>


In [8]:
# Apply this function to each item in the dataset using the map method:
num_workers = 8
IMG_SIZE = 256
normalization_fn = rescale_min_1_to_1
test = raw_test.map(lambda e: _parse_jpeg_image_function(e, IMG_SIZE, normalization_fn), num_parallel_calls=num_workers)
print(test)

<ParallelMapDataset shapes: ((256, 256, 3), (), ()), types: (tf.float32, tf.float32, tf.string)>


In [9]:
gamma_values = test.map(lambda image, gamma, filename: gamma)
gamma_values = np.array(list(gamma_values.as_numpy_iterator()))
BATCH_SIZE = 32 # mae puede variar según batch size.
test_batches = test.batch(BATCH_SIZE)
print(test_batches)

<BatchDataset shapes: ((None, 256, 256, 3), (None,), (None,)), types: (tf.float32, tf.float32, tf.string)>


In [11]:
# load model
dir = os.path.join(ARG_DATASET_DIR, "models")
saved_model_dir = '{}/{}.h5'.format(dir, ARG_MODEL_NAME)
print(f'Loading model {saved_model_dir}...')
loaded_model = tf.keras.models.load_model(saved_model_dir)

Loading model /content/drive/My Drive/Healthcare/Radioterapia/data/ciolaplata/models/1588803713.h5...


In [13]:
# Evaluate dataset with the loaded model to calculate loss (mae) because
# metric value could differ from the one reported during training.
tmp_test_batches = test_batches.map(lambda image, gamma, filename: (image, gamma))
print(tmp_test_batches)
loss, mse = loaded_model.evaluate(tmp_test_batches, workers=num_workers, verbose=0)
print('\n\nLoaded model, test loss: {:5.4f}'.format(loss))
print('Loaded model, test mse: {:5.4f}'.format(mse))

<MapDataset shapes: ((None, 256, 256, 3), (None,)), types: (tf.float32, tf.float32)>


Loaded model, test loss: 0.0856
Loaded model, test mse: 0.0345


In [0]:
# Make predictions
predictions = loaded_model.predict(tmp_test_batches)
# from (1121,1) to (1121,); ie.: ndim = 2 to ndim = 1
predictions = predictions.reshape(-1)

In [18]:
tmp_test_batches = test.map(lambda image, gamma, filename: (filename, gamma))
list = [(filename.numpy().decode('utf-8'), gamma.numpy()) for filename, gamma in tmp_test_batches]
list2 = [(e[0], e[1], p) for e, p in zip(list, predictions)]

# armar un pandas dataframe con el test set completo
df = pd.DataFrame(list2, columns=['filename', 'actual gamma', 'predicted gamma'])
dir = os.path.join(ARG_DATASET_DIR, "predictions")
predictions_file_path = '{}/predicted_gamma_{}.{}.csv'.format(dir, ARG_MODEL_NAME, ARG_PART)
df.to_csv(predictions_file_path, index=False)
print(f'Predictions saved to {predictions_file_path}.')

Predictions saved to /content/drive/My Drive/Healthcare/Radioterapia/data/ciolaplata/predictions/predicted_gamma_1588803713.train.csv.
