## Labelling Images

We will use some images for new label, using "LabelImg" tool.

https://github.com/tzutalin/labelImg#usage

Since Google Colab does not support GUI applications, we should use this tool in our environment.

> pip install labelimg

> labelimg

Let's save the files as XML format.

Convert XML files into CSV, and into TFRecord files.



## Transfer Learning

Example: https://keras.io/examples/keras_recipes/tfrecord/

Import necessary modules

In [None]:
import matplotlib.pylab as plt
import tensorflow as tf
!pip install -q -U tf-hub-nightly
import tensorflow_hub as hub
from tensorflow.keras import layers
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
from functools import partial
import os
import zipfile
 
tf.compat.v1.enable_eager_execution()

[?25l[K     |███                             | 10kB 17.6MB/s eta 0:00:01[K     |██████                          | 20kB 24.7MB/s eta 0:00:01[K     |█████████                       | 30kB 18.1MB/s eta 0:00:01[K     |████████████                    | 40kB 14.9MB/s eta 0:00:01[K     |███████████████                 | 51kB 16.6MB/s eta 0:00:01[K     |██████████████████              | 61kB 16.8MB/s eta 0:00:01[K     |█████████████████████           | 71kB 14.4MB/s eta 0:00:01[K     |████████████████████████        | 81kB 13.2MB/s eta 0:00:01[K     |███████████████████████████     | 92kB 12.6MB/s eta 0:00:01[K     |██████████████████████████████  | 102kB 12.0MB/s eta 0:00:01[K     |████████████████████████████████| 112kB 12.0MB/s 
[?25h

In [None]:
tf.__version__

'2.4.1'

Load the TFRecord for adding a new label, from my Google Drive

In [None]:
#train_id = "https://drive.google.com/file/d/1BcgncfyipV8V_eTG4UmPSAosNfpKEndf/view?usp=sharing"
#valid_id = "https://drive.google.com/file/d/1kseKkBh4Blw6uLqqpSFkuIlQ06FoVt6V/view?usp=sharing"
#test_id = "https://drive.google.com/file/d/1s-41CVh9U80I7EYqRoMABTykX2ZcHcHw/view?usp=sharing"
 
os.makedirs('/tfrecord')
os.makedirs('/tfrecord/train')
os.makedirs('/tfrecord/valid')
os.makedirs('/tfrecord/test')
 
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
 
tfr_train = drive.CreateFile({'id':"1BcgncfyipV8V_eTG4UmPSAosNfpKEndf"})
tfr_train.GetContentFile('/tfrecord/train/train.tfrecord')

tfr_valid = drive.CreateFile({'id':"1kseKkBh4Blw6uLqqpSFkuIlQ06FoVt6V"})
tfr_valid.GetContentFile('/tfrecord/valid/valid.tfrecord')

tfr_test = drive.CreateFile({'id':"1s-41CVh9U80I7EYqRoMABTykX2ZcHcHw"})
tfr_test.GetContentFile('/tfrecord/test/test.tfrecord')

In [None]:
TRAINING_FILENAMES = tf.io.gfile.glob("/tfrecord/train/*.tfrecord")
VALID_FILENAMES = tf.io.gfile.glob("/tfrecord/test/*.tfrecord")
 
TEST_FILENAMES = tf.io.gfile.glob("/tfrecord/test/*.tfrecord")
print("Train TFRecord Files:", len(TRAINING_FILENAMES))
print("Validation TFRecord Files:", len(VALID_FILENAMES))
print("Test TFRecord Files:", len(TEST_FILENAMES))

Train TFRecord Files: 1
Validation TFRecord Files: 1
Test TFRecord Files: 1


In [None]:
AUTOTUNE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 64
IMAGE_SIZE = [300, 300]

Decode image data to tensor format

In [None]:
 
def decode_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32)
    image = tf.image.resize(image, IMAGE_SIZE, method = tf.image.ResizeMethod.BILINEAR)    
    image = tf.reshape(image, [*IMAGE_SIZE, 3])
    return image

Read TFRecord file

In [None]:
def read_tfrecord(example, labeled):
    tfrecord_format = (
        {
            "encoded": tf.io.FixedLenFeature([], tf.string), #image/encoded
            "source_id": tf.io.FixedLenFeature([], tf.string), #image/source_id
        }
        if labeled
        else {"encoded": tf.io.FixedLenFeature([], tf.string),}
    )
    example = tf.io.parse_single_example(example, tfrecord_format)
    image = decode_image(example["encoded"])
    if labeled:
        label = tf.cast(example["source_id"], tf.string)
        return image, label
    return image

In [None]:
 
def load_dataset(filenames, labeled=True):
    ignore_order = tf.data.Options()
    ignore_order.experimental_deterministic = False  # disable order, increase speed
    dataset = tf.data.TFRecordDataset(
        filenames
    )  # automatically interleaves reads from multiple files
    dataset = dataset.with_options(
        ignore_order
    )  # uses data as soon as it streams in, rather than in its original order
    dataset = dataset.map(
        partial(read_tfrecord, labeled=labeled), num_parallel_calls=AUTOTUNE
    )
    # returns a dataset of (image, label) pairs if labeled=True or just images if labeled=False
    return dataset

In [None]:
def get_dataset(filenames, labeled=True):
    dataset = load_dataset(filenames, labeled=labeled)
    dataset = dataset.shuffle(2048)
    dataset = dataset.prefetch(buffer_size=AUTOTUNE)
    dataset = dataset.batch(BATCH_SIZE)
    return dataset

In [None]:
train_dataset = get_dataset(TRAINING_FILENAMES)
valid_dataset = get_dataset(VALID_FILENAMES)
test_dataset = get_dataset(TEST_FILENAMES, labeled=False)
 
image_batch, label_batch = next(iter(train_dataset))
 
 
def show_batch(image_batch, label_batch):
    plt.figure(figsize=(10, 10))
    for n in range(5):
        ax = plt.subplot(5, 5, n + 1)
        plt.imshow(image_batch[n] / 255.0)
        if label_batch[n]:
            plt.title("spongebob")
        else:
            plt.title("???")
        plt.axis("off")
 
 
show_batch(image_batch.numpy(), label_batch.numpy())

InvalidArgumentError: ignored

In [None]:
initial_learning_rate = 0.01
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate, decay_steps=20, decay_rate=0.96, staircase=True
)

checkpoint_cb = tf.keras.callbacks.ModelCheckpoint(
    "melanoma_model.h5", save_best_only=True
)

early_stopping_cb = tf.keras.callbacks.EarlyStopping(
    patience=10, restore_best_weights=True
)

In [None]:

def make_model():
    base_model = tf.keras.applications.Xception(
        input_shape=(*IMAGE_SIZE, 3), include_top=False, weights="imagenet"
    )

    base_model.trainable = False

    inputs = tf.keras.layers.Input([*IMAGE_SIZE, 3])
    x = tf.keras.applications.xception.preprocess_input(inputs)
    x = base_model(x)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(8, activation="relu")(x)
    x = tf.keras.layers.Dropout(0.7)(x)
    outputs = tf.keras.layers.Dense(1, activation="sigmoid")(x)

    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
        loss="binary_crossentropy",
        metrics=tf.keras.metrics.AUC(name="auc"),
    )

    return model

In [None]:
with strategy.scope():
    model = make_model()

history = model.fit(
    train_dataset,
    epochs=2,
    validation_data=valid_dataset,
    callbacks=[checkpoint_cb, early_stopping_cb],
)

In [None]:

def show_batch_predictions(image_batch):
    plt.figure(figsize=(10, 10))
    for n in range(25):
        ax = plt.subplot(5, 5, n + 1)
        plt.imshow(image_batch[n] / 255.0)
        img_array = tf.expand_dims(image_batch[n], axis=0)
        plt.title(model.predict(img_array)[0])
        plt.axis("off")


image_batch = next(iter(test_dataset))

show_batch_predictions(image_batch)



Import pre-trained object detection model (SSD Openimages v4)

In [None]:
classifier_url ="https://tfhub.dev/google/openimages_v4/ssd/mobilenet_v2/1"

In [None]:
wget classifier_url

In [None]:
IMAGE_SHAPE = (224, 224)

classifier = tf.keras.Sequential([
    hub.KerasLayer(classifier_url, input_shape=IMAGE_SHAPE+(3,))
])

Configure the .config file in the model

https://github.com/tensorflow/models/blob/master/research/object_detection/samples/configs/ssd_mobilenet_v2_oid_v4.config