In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import glob
import re
from tensorflow.keras import datasets, layers, models
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import cv2
import warnings
from keras.applications.mobilenet_v2 import preprocess_input, decode_predictions

In [None]:
warnings.filterwarnings('once')

In [None]:
filelist_c = glob.glob('input/china_xrays/*.png')
filelist_m = glob.glob('input/montgomery_xrays/*.png')
filelist = filelist_c + filelist_m

In [None]:
labels = []
for file in filelist:
    #regex to find target
    #[0-9]{4} is the initial 4 numbers after the first underscore, (.+?) to find whatever is between "####_" and ".png"
    current_label = re.findall('[0-9]{4}_(.+?).png', file)
    labels.append(current_label[0])

In [None]:
df = pd.DataFrame(filelist, columns=['file'])
df['target'] = labels
df.head()
targetdf = pd.DataFrame(df.pop('target'), columns=['target'])

In [None]:
traindf, testdf, ytrain, ytest = train_test_split(df, targetdf, stratify=targetdf, test_size=0.1)
traindf['target'] = ytrain
testdf['target'] = ytest
traindf, valdf, ytrain, yval = train_test_split(traindf, traindf['target'], stratify = traindf['target'], test_size = 0.1)
valdf['target']=yval

In [None]:
train_generator = ImageDataGenerator(rescale = 1./255, horizontal_flip = True, zoom_range = 0.1, shear_range = 0, rotation_range = 5, width_shift_range = 0.05, height_shift_range = 0.05, fill_mode = 'constant', cval = 0, preprocessing_function = preprocess_input)
test_generator = ImageDataGenerator(rescale = 1./255, preprocessing_function = preprocess_input)

In [None]:
train = train_generator.flow_from_dataframe(dataframe = traindf, class_mode = 'binary', x_col = 'file', y_col = 'target', shuffle = True, batch_size = 32, target_size = (224,224))
val = train_generator.flow_from_dataframe(dataframe = valdf, class_mode = 'binary', x_col = 'file', y_col = 'target', shuffle = True, batch_size = 32, target_size = (224,224))

In [None]:
model = models.Sequential()
model.add(tf.keras.applications.MobileNetV2(weights='imagenet', input_shape = (224,224,3), include_top=False))
model.add(layers.GlobalMaxPooling2D())
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))
model.summary()

In [None]:
model.compile(optimizer='adam', 
              loss='binary_crossentropy',
              metrics=['acc',tf.keras.metrics.Recall()])

In [None]:
model.fit(train, epochs=10, steps_per_epoch=train.samples//32, verbose=1, validation_data=val, validation_steps=val.samples//32)
