In [23]:
# Import necessary packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
%matplotlib inline
import os
import pydicom as dicom
import seaborn as sns
import glob 
import tensorflow as tf

from keras.preprocessing.image import ImageDataGenerator
from keras.applications.densenet import DenseNet121
from keras.layers import Dense, GlobalAveragePooling2D
from keras.models import Model
from keras import backend as K
from sklearn.model_selection import train_test_split

from keras.models import load_model
sns.set()

In [24]:
data_entry = pd.read_csv('Data_Entry_2017.csv')

In [25]:
all_imgs = glob.glob("dataset/*.png")
images = []

#rescale for out of memory error and testing speed
mem_scale = 1000 

selection_range = int(len(all_imgs)/mem_scale)


for x in range(selection_range):
    images.append(cv2.imread(all_imgs[x]))

#cv2.imshow('Test',images_1[0])
#cv2.waitKey(0) # waits until a key is pressed
#cv2.destroyAllWindows() # destroys the window showing image

In [26]:
y = data_entry.iloc[:selection_range, 1]
X = images

In [27]:
for i in range(len(y)):
    if y[i] == 'No Finding':
        y[i] = 0
    else:
        y[i] = 1
y = pd.to_numeric(y)

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size = 0.2,
                                                    random_state = 1)


In [29]:
X_train = np.array(X_train)
y_train = np.array(y_train)

X_test = np.array(X_test)
y_test = np.array(y_test)

In [30]:
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True)
training_set = train_datagen.flow(x = X_train, y = y_train, batch_size = 32)

In [31]:
test_datagen = ImageDataGenerator(rescale = 1./255)
test_set = test_datagen.flow(x = X_test, y = y_test, batch_size = 32)

In [32]:
# Part 2 - Building the CNN

# Initialising the CNN
cnn = tf.keras.models.Sequential()

# Step 1 - Convolution
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[1024, 1024, 3]))

# Step 2 - Pooling
cnn.add(tf.keras.layers.MaxPool2D(pool_size=4, strides=2))

# Adding second and third convolutional layers
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))

cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))


# Step 3 - Flattening
cnn.add(tf.keras.layers.Flatten())

# Step 4 - Full Connection
cnn.add(tf.keras.layers.Dense(units=128, activation='relu'))

# Step 5 - Output Layer
cnn.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

In [33]:
# Part 3 - Training the CNN

# Compiling the CNN
cnn.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Training the CNN on the Training set and evaluating it on the Test set
cnn.fit(x = training_set, validation_data = test_set, epochs = 3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x23a9c13b4f0>

In [34]:
# Part 4 - Making a single prediction
cv2.imread(all_imgs[selection_range + 1])

test_image = cv2.imread(all_imgs[selection_range + 1])
test_image = np.array(test_image)
test_image = np.expand_dims(test_image, axis = 0)
result = cnn.predict(test_image)

if result[0][0] == 1:
    prediction = 'pathology'
else:
    prediction = 'no pathology'
    
print(prediction)

no pathology
