In [7]:
import numpy as np
import random

import tensorflow as tf
import tensorflow.keras.layers as KL
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

from pathlib import Path
import matplotlib.pyplot as plt

# Check GPUs:",
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            # Prevent TensorFlow from allocating all memory of all GPUs:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)

1 Physical GPUs, 1 Logical GPUs


In [2]:
TFRECORDS_PATH = 'data/siim-isic-melanoma-classification/tfrecords'

BATCH_SIZE = 4

In [3]:
def get_tfrecords(dir_path, tag):
    p = Path(dir_path)
    l =  list(p.glob(str(tag + '*.tfrec')))
    l = [str(p) for p in l]
    return l

In [16]:
tfrecords = get_tfrecords(TFRECORDS_PATH, 'train')
len(tfrecords)
random.shuffle(tfrecords)
tfrecords_train = tfrecords[:12]
tfrecords_val = tfrecords[12:]
tfrecords

['data\\siim-isic-melanoma-classification\\tfrecords\\train12-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train04-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train15-2061.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train01-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train05-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train07-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train11-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train03-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train13-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train00-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train06-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train09-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train02-2071.tfrec',
 'data\\siim-isic-melanom

In [17]:
tfrecords_train

['data\\siim-isic-melanoma-classification\\tfrecords\\train12-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train04-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train15-2061.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train01-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train05-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train07-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train11-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train03-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train13-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train00-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train06-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train09-2071.tfrec']

In [18]:
tfrecords_val

['data\\siim-isic-melanoma-classification\\tfrecords\\train02-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train10-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train08-2071.tfrec',
 'data\\siim-isic-melanoma-classification\\tfrecords\\train14-2071.tfrec']

In [19]:
raw_dataset = tf.data.TFRecordDataset(tfrecords_train)
raw_dataset

<TFRecordDatasetV2 shapes: (), types: tf.string>

In [20]:
raw_example = next(iter(raw_dataset))

In [21]:
def tf_parse(eg, resize = None):
    example = tf.io.parse_example(
        eg[tf.newaxis], {
            'image': tf.io.FixedLenFeature(shape=(), dtype=tf.string),
            'image_name': tf.io.FixedLenFeature(shape=(), dtype=tf.string),
            'target': tf.io.FixedLenFeature(shape=(), dtype=tf.int64)
        })
    img = tf.image.decode_jpeg(example['image'][0])
    
    if resize != None:
        img = tf.image.resize(img, resize)
        
    img = img/255
    label = example['target'][0]
    return img, label

In [22]:
img, label = tf_parse(raw_example, resize = (224,224))
print(label)
print(img)

tf.Tensor(0, shape=(), dtype=int64)
tf.Tensor(
[[[0.8593037  0.6468788  0.60114044]
  [0.86584634 0.6717287  0.6459584 ]
  [0.8513205  0.6630252  0.6142857 ]
  ...
  [0.8501404  0.6552423  0.60184133]
  [0.85830307 0.6630655  0.62725097]
  [0.8538815  0.66340584 0.6412767 ]]

 [[0.8434574  0.6577231  0.58993596]
  [0.8748499  0.6918768  0.65742296]
  [0.8526611  0.66610646 0.61390555]
  ...
  [0.84959966 0.6673465  0.620748  ]
  [0.8541816  0.658944   0.6225292 ]
  [0.86444587 0.68265396 0.6498404 ]]

 [[0.85882354 0.67282915 0.6030813 ]
  [0.8629252  0.6806723  0.6211884 ]
  [0.8666667  0.6784314  0.63813525]
  ...
  [0.8515206  0.6518211  0.5792123 ]
  [0.861645   0.6855345  0.661825  ]
  [0.8630048  0.67096806 0.60480154]]

 ...

 [[0.8067225  0.6577029  0.5770306 ]
  [0.804202   0.65154094 0.54958016]
  [0.8199078  0.6591235  0.5880151 ]
  ...
  [0.8434376  0.6787317  0.6233092 ]
  [0.83713454 0.6839136  0.61080426]
  [0.8431971  0.6725495  0.6215088 ]]

 [[0.81064445 0.64589804 0.

In [23]:
decoded = raw_dataset.map(tf_parse)
decoded

<MapDataset shapes: ((None, None, None), ()), types: (tf.float32, tf.int64)>

In [24]:
decoded

<MapDataset shapes: ((None, None, None), ()), types: (tf.float32, tf.int64)>

In [26]:
def get_dataset(files_list, batch_size):
    """Read TFRecords files and turn them into a TFRecordDataset."""
    shards = tf.data.Dataset.from_tensor_slices(files_list)
    shards = shards.shuffle(tf.cast(tf.shape(files_list)[0], tf.int64))
    shards = shards.repeat()
    dataset = shards.interleave(tf.data.TFRecordDataset, cycle_length=4)
    dataset = dataset.shuffle(buffer_size=8192)
    #parser = parse_fn_train if subset == 'train' else parse_fn_valid
    parser = tf_parse
    dataset = dataset.apply(
      tf.data.experimental.map_and_batch(
          map_func=parser,
          batch_size=batch_size,
          num_parallel_calls=4))
    dataset = dataset.prefetch(batch_size)
    return dataset
train = get_dataset(tfrecords_train, BATCH_SIZE)
val = get_dataset(tfrecords_val, BATCH_SIZE)

train

Instructions for updating:
Use `tf.data.Dataset.map(map_func, num_parallel_calls)` followed by `tf.data.Dataset.batch(batch_size, drop_remainder)`. Static tf.data optimizations will take care of using the fused implementation.


<PrefetchDataset shapes: ((None, None, None, None), (None,)), types: (tf.float32, tf.int64)>

In [27]:
val

<PrefetchDataset shapes: ((None, None, None, None), (None,)), types: (tf.float32, tf.int64)>

In [28]:
base_model = tf.keras.applications.ResNet50(include_top=True, weights='imagenet', 
                                            input_tensor=None, input_shape=None,
                                            pooling=None)
base_model.summary()

Model: "resnet50"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
___________________________________________________________________________________________

In [29]:
new_output = KL.Dense(1, activation = 'sigmoid')(base_model.layers[-2].output)
#base_model.layers[-1].output_shape = (None, 1)
model = Model(base_model.input, new_output)
model.compile(loss='binary_crossentropy',
              optimizer=Adam(),
              metrics=['accuracy'])

In [30]:
model.summary()


Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
______________________________________________________________________________________________

In [None]:
history = model.fit(train, 
                    epochs = 10, 
                    validation_data = val)

Epoch 1/10
  81679/Unknown - 37916s 464ms/step - loss: 0.0897 - accuracy: 0.9824

In [None]:
                    batch_size=batch_size,
                    epochs=epochs,
                    validation_data=(x_test, y_test),
                    shuffle=True,
                    callbacks=callbacks