In [1]:
import urllib.request as request
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.models import Sequential
from tensorflow.keras.activations import sigmoid, relu
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import BinaryAccuracy, Accuracy
from tensorflow.keras.applications.vgg16 import VGG16
import numpy as np

In [2]:
url = 'https://us13.mailchimp.com/mctx/clicks?url=https%3A%2F%2Fcs.stanford.edu%2Fgroup%2Fmlgroup%2FMURA-v1.1.zip&h=f4fd3fb855fc55693dc0852de9c17bbbe39685ab0b4ce274f8d439df66274a85&v=1&xid=0a66f73932&uid=55365305&pool=contact_facing&subject=MURA-v1.1%3A+Link+To+Dataset'
request.urlretrieve(url, 'data.zip')

('data.zip', <http.client.HTTPMessage at 0x7f96cf9e4cd0>)

In [3]:
!unzip -q /content/data.zip

In [4]:
def add_header(path, headers):
  df = pd.read_csv(path, names = headers)
  df.to_csv(path, index=False)

In [5]:
add_header('/content/MURA-v1.1/train_image_paths.csv', ['path'])
add_header('/content/MURA-v1.1/train_labeled_studies.csv', ['path','label'])

add_header('/content/MURA-v1.1/valid_image_paths.csv', ['path'])
add_header('/content/MURA-v1.1/valid_labeled_studies.csv', ['path','label'])

In [6]:
train_paths = pd.read_csv('/content/MURA-v1.1/train_image_paths.csv')
train_labels = pd.read_csv('/content/MURA-v1.1/train_labeled_studies.csv')

valid_paths = pd.read_csv('/content/MURA-v1.1/valid_image_paths.csv')
valid_labels = pd.read_csv('/content/MURA-v1.1/valid_labeled_studies.csv')

In [7]:
def prepare_data(paths_df, labels_df, sub_folder):
  images, labels, folders, images_labels = [], [], [], []

  for idx, row in paths_df.iterrows():
    if sub_folder in row['path']:
      images.append(row['path'])

  for idx, row in labels_df.iterrows():
    if sub_folder in row['path']:  
      folders.append(row['path'])
      labels.append(row['label'])

  for i in range(len(folders)):
    for image in images:
      if folders[i] in image:
        images_labels.append(labels[i])
  
  merged_df = pd.DataFrame({'path': images,
                          'label': images_labels})
  
  merged_df['label'] = merged_df['label'].apply(lambda x: str(x))
  merged_df['path'] = merged_df['path'].apply(lambda x: '/content/' + x)
  return merged_df

In [8]:
train = prepare_data(train_paths, train_labels, 'XR_WRIST')
valid = prepare_data(valid_paths, valid_labels, 'XR_WRIST')

In [9]:
train.head()

Unnamed: 0,path,label
0,/content/MURA-v1.1/train/XR_WRIST/patient06359...,1
1,/content/MURA-v1.1/train/XR_WRIST/patient06359...,1
2,/content/MURA-v1.1/train/XR_WRIST/patient06360...,1
3,/content/MURA-v1.1/train/XR_WRIST/patient06360...,1
4,/content/MURA-v1.1/train/XR_WRIST/patient06360...,1


In [10]:
train_gen =  ImageDataGenerator(
 rescale=1./255,
 rotation_range=50,
 width_shift_range=0.3,
 height_shift_range=0.3,
 shear_range=0.2,
 zoom_range=[0.5,1.0],
 brightness_range=[0.2,1.0],
 horizontal_flip=True,
 fill_mode='nearest'
)

valid_gen = ImageDataGenerator(rescale=1./255)

In [11]:
train_flow = train_gen.flow_from_dataframe(train, x_col ='path', y_col = 'label', class_mode='binary', target_size=(256, 256))
valid_flow = valid_gen.flow_from_dataframe(valid, x_col ='path', y_col = 'label', class_mode='binary', target_size=(256, 256))

Found 9752 validated image filenames belonging to 2 classes.
Found 659 validated image filenames belonging to 2 classes.


In [12]:
pretrained = VGG16(input_shape=(256,256,3),include_top=False, weights="imagenet")

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [13]:
pretrained.summary()

Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 256, 256, 3)]     0         
                                                                 
 block1_conv1 (Conv2D)       (None, 256, 256, 64)      1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 256, 256, 64)      36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 128, 128, 64)      0         
                                                                 
 block2_conv1 (Conv2D)       (None, 128, 128, 128)     73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 128, 128, 128)     147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 64, 64, 128)       0     

In [14]:
for layer in pretrained.layers:
 layer.trainable = False

In [15]:
model = Sequential([pretrained,
                    layers.Dropout(0.4),
                    layers.Flatten(),
                    layers.Dense(1024, activation=relu),
                    layers.Dropout(0.3),
                    layers.Dense(1, activation=sigmoid)])

In [16]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 8, 8, 512)         14714688  
                                                                 
 dropout (Dropout)           (None, 8, 8, 512)         0         
                                                                 
 flatten (Flatten)           (None, 32768)             0         
                                                                 
 dense (Dense)               (None, 1024)              33555456  
                                                                 
 dropout_1 (Dropout)         (None, 1024)              0         
                                                                 
 dense_1 (Dense)             (None, 1)                 1025      
                                                                 
Total params: 48,271,169
Trainable params: 33,556,481
No

In [17]:
model.compile(optimizer=Adam(learning_rate=0.001),
              loss=BinaryCrossentropy(),metrics=[BinaryAccuracy()])

In [18]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)

history = model.fit(train_flow, epochs=20, validation_data = valid_flow, verbose = 1,callbacks=[callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20


In [20]:
history

<keras.callbacks.History at 0x7f96c0211b90>