In [1]:
import os
import copy
from sklearn.model_selection import train_test_split
from tqdm import tqdm_notebook as tqdm
import glob
import numpy as np
import tensorflow as tf
from skimage.io import imread, imsave
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, plot_confusion_matrix
from sklearn import svm
from PIL import Image
from tensorflow.keras.applications.resnet50 import preprocess_input

In [2]:
src = '/Users/roryliao/Desktop/kvasir-dataset/Images'
train_dir = '/Users/roryliao/Desktop/kvasir-dataset/Train'
test_dir = '/Users/roryliao/Desktop/kvasir-dataset/Test'
train_padded_dir = '/Users/roryliao/Desktop/kvasir-dataset/Train_Padded'
test_padded_dir = '/Users/roryliao/Desktop/kvasir-dataset/Test_Padded'

In [3]:
train_datagen = ImageDataGenerator(width_shift_range = 0.2, height_shift_range = 0.2, horizontal_flip = True, vertical_flip = True, preprocessing_function = preprocess_input, rotation_range = 90, validation_split = 0.1)
#Look up documentation. Look at creating new data using rotations
test_datagen = ImageDataGenerator(preprocessing_function = preprocess_input)

In [4]:
train_gen = train_datagen.flow_from_directory(train_dir, batch_size = 2, seed = 42, subset = 'training', target_size = (256, 256))
val_gen = train_datagen.flow_from_directory(train_dir, batch_size = 2, seed = 42, subset = 'validation', target_size = (256, 256))
test_gen = test_datagen.flow_from_directory(test_dir, batch_size = 2, shuffle = False, target_size = (256, 256))

Found 2883 images belonging to 8 classes.
Found 317 images belonging to 8 classes.
Found 800 images belonging to 8 classes.


In [5]:
img_paths = glob.glob(os.path.join(src, '*/*.jpg'))
parent_names = [os.path.basename(os.path.abspath(os.path.join(p, os.pardir))) for p in img_paths]
labels = np.asarray([1 if p == 'dyed-lifted-polyps' else 0 if p == 'dyed-resection-margins' else 2 if p == 'esophagitis'
                    else 3 if p == 'normal-cecum' else 4 if p == 'normal-pylorus' else 5 if p == 'normal-z-line'
                    else 6 if p == 'polyps' else 7 for p in parent_names])
imgs = np.asarray([imread(p) for p in img_paths])
len(img_paths)

4000

In [6]:
x_train, x_test, y_train, y_test = train_test_split(imgs, labels, test_size = 0.2, random_state = 1)

In [7]:
x,y = next(train_gen)
x.shape, y.shape

((2, 256, 256, 3), (2, 8))

In [8]:
x_in = tf.keras.layers.Input(shape=(256, 256, 3))

res_model = tf.keras.applications.ResNet50(include_top = False, weights = 'imagenet', input_shape = (256,256,3))



In [9]:
x = res_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.7)(x)
x_out = tf.keras.layers.Dense(8, activation='softmax')(x)

W0817 00:22:33.452571 4520883648 nn_ops.py:4224] Large dropout rate: 0.7 (>0.5). In TensorFlow 2.x, dropout() uses dropout rate instead of keep_prob. Please ensure that this is intended.
W0817 00:22:33.466248 4520883648 deprecation.py:506] From /anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [10]:
model = tf.keras.Model(res_model.input, x_out)
model.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 256, 256, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 262, 262, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 128, 128, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 128, 128, 64) 256         conv1[0][0]                      
______________________________________________________________________________________________

In [11]:
model.compile(optimizer='adam', loss='categorical_crossentropy',metrics=['acc'])

In [13]:
history = model.fit_generator(train_gen, steps_per_epoch=len(train_gen), epochs=50, validation_data=val_gen, validation_steps=len(val_gen))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
res_model.trainable = True