In [1]:
import numpy as np
import random
import pandas as pd
import os
import matplotlib.pyplot as plt
import pathlib
import shutil
import datetime
import geopandas
import pickle
from collections import Counter

import tensorflow as tf
import tensorflow.keras.layers as KL
from tensorflow.keras import Model
from tensorflow.keras.mixed_precision import experimental as mixed_precision

AUTOTUNE = tf.data.experimental.AUTOTUNE

# Check GPUs:",
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            # Prevent TensorFlow from allocating all memory of all GPUs:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        print(e)
        
#policy = mixed_precision.Policy('mixed_float16')
#mixed_precision.set_policy(policy)
#print('Compute dtype: %s' % policy.compute_dtype)
#print('Variable dtype: %s' % policy.variable_dtype)

ImportError: Traceback (most recent call last):
  File "C:\Users\ono008\.conda\envs\oystein_tf2_gpu\lib\site-packages\tensorflow\python\pywrap_tensorflow.py", line 64, in <module>
    from tensorflow.python._pywrap_tensorflow_internal import *
ImportError: DLL load failed: The specified module could not be found.


Failed to load the native TensorFlow runtime.

See https://www.tensorflow.org/install/errors

for some common reasons and solutions.  Include the entire stack trace
above this error message when asking for help.

In [None]:
JPEG_TRAIN = 'data/siim-isic-melanoma-classification/jpeg/train'
JPEG_VAL = 'data/siim-isic-melanoma-classification/jpeg/val'
JPEG_TEST = 'data/siim-isic-melanoma-classification/jpeg/test'
CSV_TRAIN = 'data/siim-isic-melanoma-classification/train.csv'
CSV_TEST = 'data/siim-isic-melanoma-classification/test.csv'
SUBMITS_DIR = 'submits/siim-isic-melanoma-classification'

#WIDTH, HEIGHT = 224, 224
WIDTH, HEIGHT = 448, 448
WIDTH, HEIGHT = 896, 896
WIDTH, HEIGHT = 768, 768

#WIDTH, HEIGHT = 1120, 1120

BATCH_SIZE = 4

METRICS = [
      tf.keras.metrics.TruePositives(name='tp'),
      tf.keras.metrics.FalsePositives(name='fp'),
      tf.keras.metrics.TrueNegatives(name='tn'),
      tf.keras.metrics.FalseNegatives(name='fn'), 
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc')
]

In [None]:
meta = pd.read_csv(CSV_TRAIN)
meta

In [None]:
def split_train_val(meta_df, train_path, val_path, val_percent = 0.25, seed = 18):
    n = len(meta_df)
    n_val = int(n * val_percent)
    n_train = n - n_val
    train_val = ['train'] * n_train + ['val'] * n_val
    random.seed(seed)
    random.shuffle(train_val)
    meta_df['train_val'] = train_val
    
    p = pathlib.Path(train_path)
    l = list(p.glob('**/*.jpg'))
    if len(l) != n:
        print('Is train/val split already done?')
        return meta_df
    
    i = 0
    for source in l:
        #print(source)
        if meta_df.loc[meta_df['image_name'] == source.stem, 'train_val'].values[0] == 'val':
            #print('val', source.stem)
            dest = pathlib.Path(val_path, source.name)
            #print(dest)
            shutil.move(source, dest)
            i += 1
    
    if i == n_val:
        print(n_val, 'validation images moved to validation directory')
    else:
        print('There is a discrepancy in number of validation images moved')
        print('Images supposed to be moved:', n_val)
        print('Images moved:', i)
    
    return meta_df

In [None]:
def move_to_target_subfolders(meta_df, jpeg_path, target_list):
    
    p = pathlib.Path(jpeg_path)
    for target in target_list:
        p_target = p.joinpath(str(target))
        p_target.mkdir(exist_ok = True)
    
    l = list(p.glob('**/*.jpg'))
    
    for p_image in l:
        image_name = p_image.stem
        #print(image_name)
        target = meta_df.loc[meta_df['image_name'] == image_name, 'target'].values[0]
        #print(target)
        p_target = p.joinpath(str(target)).joinpath(p_image.name)
        shutil.move(p_image, p_target)
        #print(p_target)
        #break
    print('Images moved to target subfolders')
        
#move_to_target_subfolders(meta, JPEG_TRAIN, [0, 1])
#move_to_target_subfolders(meta, JPEG_VAL, [0, 1])

In [None]:
meta = split_train_val(meta, JPEG_TRAIN, JPEG_VAL)
meta

In [None]:
def preprocess_images(img):
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize(img, [HEIGHT, WIDTH])
    return img

def augment(img):
    img = tf.image.random_flip_left_right(img)
    img = tf.image.random_flip_up_down(img)
        
    k = tf.random.uniform(shape = (), minval=0, maxval=4, dtype=tf.int32)
    #tf.print(k)
    img = tf.image.rot90(img, k)
    return img
    
def process_path(jpeg_path):
    image_name = tf.strings.split(tf.strings.split(jpeg_path, os.sep)[-1], '.')[0]
    train_val_test = tf.strings.split(jpeg_path, os.sep)[3]

    label = tf.strings.split(jpeg_path, os.sep)[-2]
    label = tf.strings.to_number(label, tf.int32) 
    
    img = tf.io.read_file(jpeg_path)
    img = tf.io.decode_jpeg(img)
    
    if label == 0 or train_val_test == tf.constant('val', dtype = tf.string):
        #tf.print('val condition')
        img = preprocess_images(img)
    else:
        img = preprocess_images(img)
        img = augment(img)
    
    return img, label

# https://www.tensorflow.org/tutorials/load_data/images
def prepare_for_training(ds, batch_size, cache=True, shuffle_buffer_size=100):
    # This is a small dataset, only load it once, and keep it in memory.
    # use `.cache(filename)` to cache preprocessing work for datasets that don't
    # fit in memory.
    if cache:
        if isinstance(cache, str):
            ds = ds.cache(cache)
        else:
            ds = ds.cache()

    ds = ds.shuffle(buffer_size=shuffle_buffer_size)

    # Repeat forever
    ds = ds.repeat()

    if batch_size > 0:
        ds = ds.batch(batch_size)
    
    # `prefetch` lets the dataset fetch batches in the background while the model
    # is training.
    ds = ds.prefetch(buffer_size=AUTOTUNE)

    return ds

def dataset_from_jpeg(jpeg_path, batch_size, shuffle_buffer_size, prepare = False):
    ds = tf.data.Dataset.list_files(str(jpeg_path + '*.jpg'))
    ds = ds.map(process_path, num_parallel_calls=AUTOTUNE)
    
    if prepare:
        ds = prepare_for_training(ds, batch_size, cache = False, 
                                  shuffle_buffer_size = shuffle_buffer_size)
    else:
        #ds = ds.cache()
        ds = ds.shuffle(buffer_size=shuffle_buffer_size)
        ds = ds.repeat()
    return ds

def dataset_combine(jpeg_path, batch_size, cache = True, shuffle_buffer_size = 1000):
    ds_0 = dataset_from_jpeg(str(jpeg_path + '/0'), False, shuffle_buffer_size, prepare = False)
    ds_1 = dataset_from_jpeg(str(jpeg_path + '/1'), False, shuffle_buffer_size, prepare = False)

    ds = tf.data.experimental.sample_from_datasets([ds_0, ds_1], weights=[0.5, 0.5])
    ds = prepare_for_training(ds, batch_size, cache = False, shuffle_buffer_size = shuffle_buffer_size)
    return ds

ds_train = dataset_combine(JPEG_TRAIN, BATCH_SIZE, shuffle_buffer_size = 500)
ds_val = dataset_from_jpeg(JPEG_VAL, BATCH_SIZE, shuffle_buffer_size = 150, prepare = True)

In [None]:
def predict_process_path(jpeg_path):
    img = tf.io.read_file(jpeg_path)
    img = tf.io.decode_jpeg(img)
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize(img, [HEIGHT, WIDTH])
    return img
    
def predict_dataset(jpeg_path):
    ds = tf.data.Dataset.list_files(str(jpeg_path + '*.jpg'), shuffle = False)
    ds = ds.map(predict_process_path, num_parallel_calls=AUTOTUNE)
    ds = ds.batch(BATCH_SIZE)
    ds = ds.prefetch(buffer_size=AUTOTUNE)
    return ds

ds_test = predict_dataset(JPEG_TEST)

In [None]:
def show(image, label):
    for i in range(BATCH_SIZE):
        plt.figure()
        plt.imshow(image[i,:,:,:])
        plt.title(str(i) + ' - ' + str(label.numpy()[i]))
        plt.axis('off')

In [None]:
def build_resnet50():
    base_model = tf.keras.applications.ResNet50(include_top=True, weights=None, 
                                            input_tensor=None, input_shape=(HEIGHT, WIDTH, 3),
                                            pooling=None)
    #base_model.summary()
    new_output = KL.Dense(1, activation = 'sigmoid', dtype = 'float32')(base_model.layers[-2].output)
    model = Model(base_model.input, new_output)
    opt = tf.keras.optimizers.Adam(learning_rate=0.001)
    model.compile(loss='binary_crossentropy',
                  optimizer=opt,
                  metrics=METRICS)
    model.summary()
    return model

In [None]:
def build_efficientnet_b4():
    base_model = tf.keras.applications.EfficientNetB4(
        include_top=True, weights=None, input_tensor=None, input_shape=(HEIGHT, WIDTH, 3),
        pooling=None, classes=2, classifier_activation='sigmoid')
    bas_model.summary()
    
build_efficientnet_b4()

In [None]:
model = build_resnet50()

In [None]:
log_dir = pathlib.Path('logs/siim-isic-melanoma-classification/fit/' + datetime.datetime.now().strftime('%Y%m%d-%H%M%S'))
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath = 'models/siim-isic-melanoma-classification/resnet50-768-{epoch:02d}-{val_auc:.4f}.h5', 
    monitor = "val_auc",
    mode='max',
    save_best_only = True,
    save_weights_only = True,
    )

In [None]:
model.load_weights('models/siim-isic-melanoma-classification/resnet50-768-38-0.8852.h5')

In [None]:
history = model.fit(ds_train, 
                    validation_data = ds_val,
                    epochs = 20, 
                    steps_per_epoch = 400, 
                    validation_steps = 400, 
                    callbacks = [tensorboard_callback, checkpoint_callback])

In [None]:
model.save_weights('models/siim-isic-melanoma-classification/submit-04.h5')

In [None]:
model.evaluate(ds_val, steps = 2000)

In [None]:
def predict(model, jpeg_path, dataset, csv_filename, take = False):
    p = pathlib.Path(jpeg_path)
    l = list(p.glob('**/*.jpg'))
    image_names = [p.stem for p in l]
    
    if not take:
        print(take)
        preds = model.predict(dataset)
        preds = np.ndarray.flatten(preds)
        df = pd.DataFrame({'image_name': image_names, 
                           'target': preds})
    else:
        print(take)
        preds = model.predict(dataset.take(take))
        preds = np.ndarray.flatten(preds)
        df = pd.DataFrame({'image_name': image_names[:take*BATCH_SIZE], 
                           'target': preds})
        
    df.to_csv(pathlib.Path(SUBMITS_DIR, csv_filename), index = False)
    return df

predicts = predict(model, JPEG_TEST, ds_test, 'submit-05.csv', take = False)

In [None]:
predicts