In [0]:
from matplotlib import pyplot as plt
import cv2
import numpy as np
import pandas as pd
import os
import sys
from IPython.display import display, clear_output
%tensorflow_version 2.x
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical
pd.options.mode.chained_assignment = None
AUTOTUNE = tf.data.experimental.AUTOTUNE

In [0]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

In [9]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


##  Define constants

In [0]:
CLASS_NAMES = np.array(['No DR', 'Mild', 'Moderate', 'Severe', 'Proliferative DR'])
CLASS_INDEXES = range(0,len(CLASS_NAMES))
WIDTH = HEIGHT = 512
BATCH_SIZE = 64
BASE_IMAGE_DIR = os.path.join('drive/My Drive/Colab Notebooks', 'kaggle_data', 'train')

## Process dataframe 

In [0]:
def load_df():
  df = pd.read_csv(os.path.join('drive/My Drive/Colab Notebooks', 'kaggle_data', 'trainLabels.csv'))
  print(df.head())
  return df

def add_features(df):
  df['image_path'] = BASE_IMAGE_DIR + '/' + df['image'].astype(str) + '.jpeg'
  df['level_categorical'] = df['level'].map(lambda x: to_categorical(x, len(CLASS_NAMES), dtype='bool'))
  return df

def remove_unexist(df):
  df['exists'] = df['image_path'].map(os.path.exists)
  df = df[df['exists']].drop(columns=['exists'])
  return df

def train_val_split(df):
  train_img, val_img = train_test_split(df['image'], 
                                        test_size = 0.20, 
                                        random_state = 2020)
  train_df = df[df['image'].isin(train_img)]
  val_df = df[df['image'].isin(val_img)]
  print('train', train_df.shape[0], 'val', val_df.shape[0])
  return train_df, val_df

def balancing(df):
  max_level_count = df['level'].value_counts().max()
  for level in df['level'].unique():
    if level == float('nan'):
      break
    count_of_level = df[df['level']==level].count()[0]
    if count_of_level == max_level_count:
      continue
    to_add = []
    to_add_count = max_level_count - count_of_level
    print(f'Need to add {to_add_count} copies of level {level}, where count of level is {count_of_level}')
    it = iter(df[df['level']==level].iterrows())
    while to_add_count > len(to_add):
      _, row = None, None
      try:
        _, row = next(it)
      except StopIteration:
        it = iter(df[df['level']==level].iterrows())
        _, row = next(it)
      to_add.append(row)
    df = df.append(to_add)
  return df

def prepare_data():
  df = load_df()
  df = add_features(df)
  df = remove_unexist(df)
  train_df, val_df = train_val_split(df)
  train_df = balancing(train_df)
  return train_df, val_df

In [12]:
train_df, val_df = prepare_data()

# # build a lookup table for using in tensorflow
# level_dict = tf.lookup.StaticHashTable(
#     initializer=tf.lookup.KeyValueTensorInitializer(
#         keys=tf.constant(df['image']),
#         values=tf.constant(df['level']),
#     ),
#     default_value=tf.constant(-1),
#     name="class_weight"
# )

      image  level
0   10_left      0
1  10_right      0
2   13_left      0
3  13_right      0
4   15_left      1
train 6725 val 1682
Need to add 4423 copies of level 1, where count of level is 474
Need to add 3856 copies of level 2, where count of level is 1041
Need to add 4772 copies of level 4, where count of level is 125
Need to add 4709 copies of level 3, where count of level is 188


In [0]:
df['level'].value_counts().plot(kind='bar', figsize=(14,8), title='Level distribution')

## Build input pipeline

In [0]:
def decode_img(img):
  # convert the compressed string to a 3D uint8 tensor
  img = tf.image.decode_jpeg(img, channels=3)
  # Use `convert_image_dtype` to convert to floats in the [0,1] range.
  img = tf.image.convert_image_dtype(img, tf.float32)
  # resize the image to the desired size.
  return tf.image.resize(img, [WIDTH, HEIGHT])

def get_label(level):
  # parts = tf.strings.split(file_path, os.path.sep)
  # image_name = tf.strings.split(parts[-1], '.')[0]
  return level == CLASS_INDEXES

def process_path(file_path, level):
  label = get_label(level)
  # load the raw data from the file as a string
  img = tf.io.read_file(file_path)
  img = decode_img(img)
  return img, label

def prepare_for_training(ds, cache=True, shuffle_buffer_size=1000):
  if cache:
    if isinstance(cache, str):
      ds = ds.cache(cache)
    else:
      ds = ds.cache()
  ds = ds.shuffle(buffer_size=shuffle_buffer_size)
  ds = ds.repeat()
  ds = ds.batch(BATCH_SIZE)
  ds = ds.prefetch(buffer_size=AUTOTUNE)
  return ds

def prepare_for_validation(ds, cache=True, shuffle_buffer_size=1000):
  if cache:
    if isinstance(cache, str):
      ds = ds.cache(cache)
    else:
      ds = ds.cache()
  ds = ds.shuffle(buffer_size=shuffle_buffer_size)
  ds = ds.repeat()
  ds = ds.batch(BATCH_SIZE)
  ds = ds.prefetch(buffer_size=AUTOTUNE)
  return ds

def show_batch(image_batch, label_batch):
    plt.figure(figsize=(10,10))
    for n in range(25):
        ax = plt.subplot(5,5,n+1)
        plt.imshow(image_batch[n])
        plt.title(CLASS_NAMES[label_batch[n]==1][0].title())
        plt.axis('off')     

def dataset_from_tensor_slices(df):
  return tf.data.Dataset.from_tensor_slices((
      df['image_path'].values,
      df['level'].values))

def create_datasets(train_df, val_df):
  train_ds = dataset_from_tensor_slices(train_df)
  val_ds = dataset_from_tensor_slices(val_df)
  train_ds = train_ds.map(process_path, num_parallel_calls=AUTOTUNE)
  train_ds = prepare_for_training(train_ds, cache=False)
  val_ds = val_ds.map(process_path, num_parallel_calls=AUTOTUNE)
  val_ds = prepare_for_validation(val_ds, cache=False)
  return train_ds, len(train_df), val_ds, len(val_df)

In [0]:
train_ds, train_count, val_ds, val_count = create_datasets(train_df, val_df)

In [21]:
for image, label in train_ds.take(1):
  print("Image shape: ", image.numpy().shape)
  print("Label: ", label.numpy())

Image shape:  (64, 512, 512, 3)
Label:  [[False  True False False False]
 [ True False False False False]
 [ True False False False False]
 [ True False False False False]
 [False False False False  True]
 [ True False False False False]
 [ True False False False False]
 [False False  True False False]
 [ True False False False False]
 [ True False False False False]
 [ True False False False False]
 [ True False False False False]
 [ True False False False False]
 [False False  True False False]
 [ True False False False False]
 [ True False False False False]
 [False False  True False False]
 [False False False  True False]
 [False  True False False False]
 [ True False False False False]
 [ True False False False False]
 [False False  True False False]
 [ True False False False False]
 [False False  True False False]
 [ True False False False False]
 [ True False False False False]
 [ True False False False False]
 [ True False False False False]
 [ True False False False False]
 [F

In [0]:
image_batch, label_batch = next(iter(train_ds))
show_batch(image_batch.numpy(), label_batch.numpy())

In [22]:
# del model
model = models.Sequential()
model.add(layers.Conv2D(16, (3, 3), activation=tf.keras.layers.LeakyReLU(alpha=0.3), padding='valid',input_shape=(HEIGHT, WIDTH, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(16, (3, 3), activation=tf.keras.layers.LeakyReLU(alpha=0.3)))
# model.add(layers.Conv2D(32, (3, 3), activation='softmax',padding='same'))
# model.add(layers.MaxPooling2D((2, 2)))
# model.add(layers.Conv2D(64, (3, 3), activation='softmax',padding='same'))
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='softmax'))
model.add(layers.Dense(len(CLASS_NAMES), activation='softmax'))
model.summary()
model.compile(optimizer='adadelta',
            loss='categorical_crossentropy',
            metrics=['accuracy'])

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 510, 510, 16)      448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 255, 255, 16)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 253, 253, 16)      2320      
_________________________________________________________________
flatten (Flatten)            (None, 1024144)           0         
_________________________________________________________________
dense (Dense)                (None, 256)               262181120 
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 1285      
Total params: 262,185,173
Trainable params: 262,185,173
Non-trainable params: 0
__________________________________________

In [0]:
model.fit(train_ds, steps_per_epoch=train_count // BATCH_SIZE, epochs=3,
          validation_data=val_ds, validation_steps=val_count // BATCH_SIZE)

Train for 382 steps, validate for 26 steps
Epoch 1/3
 19/382 [>.............................] - ETA: 2:25:54 - loss: 1.6111 - accuracy: 0.0156

In [0]:
model.predict_classes(image_batch)

array([2, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 2, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 0, 2, 2, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0])

In [0]:
label_batch

array([[1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [0., 0., 1., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [1., 0., 0., 0., 0.],
       [0., 1.