In [1]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import os
import matplotlib.pylab as plt
from sklearn.metrics import roc_curve, roc_auc_score, recall_score
from tensorflow.keras.applications import InceptionResNetV2, ResNet50, InceptionV3, DenseNet121, Xception
import tensorflow_hub as hub

In [2]:
#set tensorflow to use CPU
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

In [3]:
img_size = 512
batch_size = 16
num_classes = 10
data_dir = '../data/frames/train'
def get_ds():
    train_ds = tf.keras.preprocessing.image_dataset_from_directory(data_dir, label_mode='categorical', validation_split=0.2, subset='training', shuffle=False, image_size=(img_size, img_size), batch_size=batch_size)
    val_ds = tf.keras.preprocessing.image_dataset_from_directory(data_dir, label_mode='categorical', validation_split=0.2, subset='validation', shuffle=False, image_size=(img_size, img_size), batch_size=batch_size)
    #Apply data augmentation
    preprocessing_model = tf.keras.Sequential()
    preprocessing_model.add(
        tf.keras.layers.experimental.preprocessing.RandomRotation(40))
    preprocessing_model.add(
        tf.keras.layers.experimental.preprocessing.RandomTranslation(0.2, 0.2))
    preprocessing_model.add(
        tf.keras.layers.experimental.preprocessing.RandomZoom(0.2, 0.2))
    preprocessing_model.add(
        tf.keras.layers.experimental.preprocessing.RandomFlip(mode="horizontal"))
    preprocessing_model.add(
        tf.keras.layers.experimental.preprocessing.RandomFlip(mode="vertical"))

    train_ds = train_ds.map(lambda images, labels:
                            (preprocessing_model(images), labels))
    return train_ds, val_ds

In [4]:
n_layers = 2
n_neurons = 512
n_dropout = 0.2
def get_model():
    base_model = tf.keras.applications.InceptionResNetV2(include_top=False, weights='imagenet', input_shape=(img_size, img_size, 3))
    preprocess_fx = tf.keras.applications.inception_resnet_v2.preprocess_input
    
    inputs = tf.keras.Input(shape=(img_size, img_size, 3))
    y = preprocess_fx(inputs)
    y = base_model(y, training=True)
    y = tf.keras.layers.GlobalAveragePooling2D()(y)
    y = tf.keras.layers.Dropout(n_dropout)(y)
    y = tf.keras.layers.Dense(n_neurons, activation='relu')(y)
    y = tf.keras.layers.Dropout(n_dropout)(y)
    outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(y)
    model = tf.keras.Model(inputs, outputs)
    
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=3e-4),
                loss=keras.losses.CategoricalCrossentropy(),
                metrics=['categorical_accuracy'])
    return model


In [5]:
early_stopping = keras.callbacks.EarlyStopping(patience=100, min_delta=1e-6, restore_best_weights=True)

In [6]:
# strategy = tf.distribute.MirroredStrategy()
# print('Number of devices: {}'.format(strategy.num_replicas_in_sync))
# with strategy.scope():
model = get_model()
train_ds, val_ds = get_ds()

2023-02-02 18:31:15.330613: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-02-02 18:31:16.961772: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 18288 MB memory:  -> device: 0, name: NVIDIA RTX A4500, pci bus id: 0000:81:00.0, compute capability: 8.6
2023-02-02 18:31:16.962631: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 10528 MB memory:  -> device: 1, name: NVIDIA TITAN V, pci bus id: 0000:02:00.0, compute capability: 7.0


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
Found 2847 files belonging to 10 classes.
Using 2278 files for training.
Found 2847 files belonging to 10 classes.
Using 569 files for validation.


In [7]:
epochs = 500
history = model.fit(train_ds, epochs=epochs, validation_data=val_ds, callbacks=[early_stopping])

Epoch 1/500


2023-02-02 18:31:50.657794: I tensorflow/stream_executor/cuda/cuda_dnn.cc:366] Loaded cuDNN version 8700

You may not need to update to CUDA 11.1; cherry-picking the ptxas binary is often sufficient.
2023-02-02 18:31:51.533030: W tensorflow/stream_executor/gpu/asm_compiler.cc:230] Falling back to the CUDA driver for PTX compilation; ptxas does not support CC 8.6
2023-02-02 18:31:51.533051: W tensorflow/stream_executor/gpu/asm_compiler.cc:233] Used ptxas at ptxas
2023-02-02 18:31:51.533136: W tensorflow/stream_executor/gpu/redzone_allocator.cc:314] UNIMPLEMENTED: ptxas ptxas too old. Falling back to the driver to compile.
Relying on driver to perform ptx compilation. 
Modify $PATH to customize ptxas location.
This message will be only logged once.
2023-02-02 18:31:52.738637: I tensorflow/stream_executor/cuda/cuda_blas.cc:1774] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500

In [None]:
import pandas as pd
from sklearn.metrics import roc_auc_score, accuracy_score
y_preds = {}
y_trues = {}
for i in range(num_classes):
    y_preds[i] = np.array([])
    y_trues[i] = np.array([])
for images, labels in val_ds:
    pred = model.predict(images)
    for i in range(8):
        y_preds[i] = np.concatenate((y_preds[i], pred[:, i]))
        y_trues[i] = np.concatenate((y_trues[i], labels[:, i]))

2023-01-18 16:30:26.870768: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:766] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Did not find a shardable source, walked to a node which is not a dataset: name: "FlatMapDataset/_9"
op: "FlatMapDataset"
input: "PrefetchDataset/_8"
attr {
  key: "Targuments"
  value {
    list {
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: -2
  }
}
attr {
  key: "f"
  value {
    func {
      name: "__inference_Dataset_flat_map_slice_batch_indices_108575"
    }
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\022FlatMapDataset:449"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: -1
        }
      }
    }
  }
}
attr {
  key: "output_types"
  value {
    list {
      type: DT_INT64
    }
  }
}
. Consider either turning off auto-sharding or switching the auto_shard_policy to DATA to shard this dataset. 

Instructions for updating:
use `experimental_local_results` instead.


Instructions for updating:
use `experimental_local_results` instead.
2023-01-18 16:30:30.989917: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:766] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Did not find a shardable source, walked to a node which is not a dataset: name: "FlatMapDataset/_9"
op: "FlatMapDataset"
input: "PrefetchDataset/_8"
attr {
  key: "Targuments"
  value {
    list {
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: -2
  }
}
attr {
  key: "f"
  value {
    func {
      name: "__inference_Dataset_flat_map_slice_batch_indices_109554"
    }
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\022FlatMapDataset:475"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: -1
        }
      }
    }
  }
}
attr {
  key: "output_types"
  value {
    list {
      type: DT_INT64
    }
  }
}
. Consider either turning off auto-shardi

In [None]:
class_strs = [folder for folder in os.listdir(data_dir) if os.path.isdir]
class_strs.sort()

In [None]:
class_strs

['oob',
 'other',
 'phase_1_exposure',
 'phase_2_hiatal_dissec',
 'phase_3_fundus',
 'phase_4_eso',
 'phase_5_hiatal_repair',
 'phase_6_wrap']

In [None]:
results_df = pd.DataFrame(columns=['class', 'auc', 'accuracy'])
for i in range(8):
    auc = roc_auc_score(y_trues[i], y_preds[i])
    acc = accuracy_score(y_trues[i], y_preds[i] > 0.5)
    results_df.loc[i] = [class_strs[i], auc, acc]
results_df

Unnamed: 0,class,auc,accuracy
0,oob,0.977521,0.973422
1,other,0.971264,0.903654
2,phase_1_exposure,0.999423,0.996678
3,phase_2_hiatal_dissec,0.971388,0.936877
4,phase_3_fundus,0.907915,0.9701
5,phase_4_eso,0.980769,0.956811
6,phase_5_hiatal_repair,0.988927,0.953488
7,phase_6_wrap,0.996226,0.976744


In [None]:
#load a test ds with no labels
test_dir = '../data/test'
test_ds = tf.keras.preprocessing.image_dataset_from_directory(test_dir, image_size=(img_size, img_size), batch_size=1)


Found 1360 files belonging to 1 classes.


In [None]:
file_paths = test_ds.file_paths

In [None]:
#get predictions for each image and save to csv
class_strs.append('image_fname', 'label')
preds_df = pd.DataFrame(columns=class_strs)

In [None]:
for image in test_ds:
    pred = model.predict(image)
    pred = pred[0]
    pred = np.append(pred, os.path.basename(file_paths[0]))
    pred = np.append(pred, os.path.basename(os.path.dirname(file_paths[0])))
    preds_df.loc[len(preds_df)] = pred
    file_paths = file_paths[1:]

2023-01-18 16:56:59.331721: W tensorflow/core/grappler/optimizers/data/auto_shard.cc:766] AUTO sharding policy will apply DATA sharding policy as it failed to apply FILE sharding policy because of the following reason: Did not find a shardable source, walked to a node which is not a dataset: name: "FlatMapDataset/_9"
op: "FlatMapDataset"
input: "PrefetchDataset/_8"
attr {
  key: "Targuments"
  value {
    list {
    }
  }
}
attr {
  key: "_cardinality"
  value {
    i: -2
  }
}
attr {
  key: "f"
  value {
    func {
      name: "__inference_Dataset_flat_map_slice_batch_indices_116997"
    }
  }
}
attr {
  key: "metadata"
  value {
    s: "\n\022FlatMapDataset:960"
  }
}
attr {
  key: "output_shapes"
  value {
    list {
      shape {
        dim {
          size: -1
        }
      }
    }
  }
}
attr {
  key: "output_types"
  value {
    list {
      type: DT_INT64
    }
  }
}
. Consider either turning off auto-sharding or switching the auto_shard_policy to DATA to shard this dataset. 

In [None]:
preds_df.head()

Unnamed: 0,oob,other,phase_1_exposure,phase_2_hiatal_dissec,phase_3_fundus,phase_4_eso,phase_5_hiatal_repair,phase_6_wrap,image_fname
0,0.0145383645,0.88687944,0.018921277,0.023483107,0.011018063,0.020968148,0.010109716,0.014081824,vid_1_3565.jpg
1,1.9744988e-05,0.0033628787,2.005741e-06,3.8803457e-05,6.1614423e-06,6.8650356e-06,0.0034440015,0.9931195,vid_1_4860.jpg
2,0.0025292393,0.724156,0.0009202642,0.004356111,0.0015864857,0.0086976485,0.09305355,0.16470067,vid_1_865.jpg
3,0.055614695,0.7358492,0.07810252,0.053913485,0.011321764,0.05913351,0.0037437472,0.0023211276,vid_1_2300.jpg
4,0.016452251,0.7833865,0.014081486,0.029732594,0.01963642,0.024813775,0.02353263,0.08836426,vid_1_2465.jpg


In [None]:
preds_df.to_csv('../data/test/preds.csv')