In [1]:
import tensorflow as tf
from functools import partial
import matplotlib.pyplot as plt


In [2]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
GCS_PATH = "/data/datasets/saket/SeeingThroughFogData/train_clear_day/*.swedentfrecord"
BATCH_SIZE = 64
#IMAGE_SIZE = [1024, 1024]

In [3]:
FILENAMES = tf.io.gfile.glob(GCS_PATH)
split_ind = int(0.9 * len(FILENAMES))
TRAINING_FILENAMES, VALID_FILENAMES = FILENAMES[:split_ind], FILENAMES[split_ind:]
print("Train TFRecord Files:", len(TRAINING_FILENAMES))
print("Validation TFRecord Files:", len(VALID_FILENAMES))

Train TFRecord Files: 491
Validation TFRecord Files: 55


In [4]:
def decode_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32)
    #image = tf.reshape(image, [*IMAGE_SIZE, 3])
    return image


In [5]:
def read_tfrecord(example, labeled):
    tfrecord_format = (
        {'image/cam_stereo_left_lut': tf.io.FixedLenFeature([],tf.string),
        'image/format': tf.io.FixedLenFeature([], tf.string, default_value='png'),
        'image/shape/cam_stereo_left_lut': tf.FixedLenFeature([3], tf.int64),
        'image/object/class/text': tf.io.VarLenFeature(dtype=tf.string),
        'image/object/bbox/xmin': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/xmax': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymin': tf.io.VarLenFeature(dtype=tf.float32),
        'image/object/bbox/ymax': tf.io.VarLenFeature(dtype=tf.float32),
        }
    )
    example = tf.io.parse_single_example(example, tfrecord_format)
    image = decode_image(example["image/cam_stereo_left_lut"])
    if labeled:
        label = tf.cast(example["image/object/class/text"], tf.string)
        shape = tf.cast(example["image/shape/cam_stereo_left_lut"], tf.int64)
        #colors = np.array([[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]])
        bbox_y_min = tf.cast("image/object/bbox/y_min", tf.float32)
        bbox_x_min = tf.cast("image/object/bbox/x_min", tf.float32)  
        bbox_y_max = tf.cast("image/object/bbox/y_max", tf.float32)
        bbox_x_max = tf.cast("image/object/bbox/x_max", tf.float32)
        #bbox = [bbox_y_min,bbox_x_min,bbox_y_max, bbox_x_max]
        #bbox = example[bbox]
        #bbox = tf.cast(example[("image/object/bbox/y_min","image/object/bbox/x_min",
        #                       "image/object/bbox/y_max","image/object/bbox/x_max")],
        #               tf.int32)

        return image, label
    return image


In [6]:
def load_dataset(filenames, labeled=True):
    ignore_order = tf.data.Options()
    ignore_order.experimental_deterministic = False  # disable order, increase speed
    dataset = tf.data.TFRecordDataset(
        filenames
    )  # automatically interleaves reads from multiple files
    dataset = dataset.with_options(
        ignore_order
    )  # uses data as soon as it streams in, rather than in its original order
    dataset = dataset.map(
        partial(read_tfrecord, labeled=labeled), num_parallel_calls=AUTOTUNE
    )
    # returns a dataset of (image, label) pairs if labeled=True or just images if labeled=False
    return dataset


In [7]:
@tf.function
def get_dataset(filenames, labeled=True):
    dataset = load_dataset(filenames, labeled=labeled)
    dataset = dataset.shuffle(2048)
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)
    dataset = dataset.batch(BATCH_SIZE)
    return dataset


In [8]:
train_dataset = get_dataset(TRAINING_FILENAMES)
valid_dataset = get_dataset(VALID_FILENAMES)
#test_dataset = get_dataset(TEST_FILENAMES, labeled=False)

#image_batch, label_batch = next(iter(train_dataset))

train_dataset




<_VariantDataset shapes: ((?, ?, ?, 3), (?, ?)), types: (tf.float32, tf.string)>

In [9]:
initial_learning_rate = 0.01
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=20, decay_rate=0.96, staircase=True
)

checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    "melanoma_model.h5", save_best_only=True
)

early_stopping_cb = tf.keras.callbacks.EarlyStopping(
    patience=10, restore_best_weights=True
)

In [18]:
def make_model():
    #base_model = tf.keras.applications.Xception(include_top=False, weights="imagenet"
    #)

    #base_model.trainable = False

    inputs = tf.keras.layers.Input(3,)
    #x = tf.keras.applications.xception.preprocess_input(inputs)
    #x = base_model(x)
    #x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(8, activation="relu")(inputs)
    x = tf.keras.layers.Dropout(0.4)(x)
    outputs = tf.keras.layers.Dense(1, activation="sigmoid")(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
        loss="binary_crossentropy",
    )

    return model

In [11]:
import numpy as np
import tensorflow as tf
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()

INFO:tensorflow:Single-worker CollectiveAllReduceStrategy with local_devices = ('/device:CPU:0',), communication = CollectiveCommunication.AUTO


In [19]:
with strategy.scope():
    model = make_model()

history = model.fit(
    valid_dataset,
    epochs=2,
    validation_data=valid_dataset
)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Train on None steps
Epoch 1/2


InvalidArgumentError: In[0] is not a matrix. Instead it has shape [64,1024,1920,3]
	 [[{{node dense_2/Relu}}]]