In [None]:
import tensorflow as tf
from tensorflow import keras
from keras.applications import resnet50
from PIL import Image
from numpy import asarray

import random
import os
import pandas as pd
import numpy as np
from skimage.io import imread_collection
import skimage.io
import skimage.color
import skimage.transform
from platform import python_version
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.applications.resnet50 import ResNet50
from keras.preprocessing import image
from keras.applications.resnet50 import preprocess_input, decode_predictions

print(tf.__version__)
print(python_version())

In [None]:
# extract filenames from the folder of images
filenames = []
for root, dirs, files in os.walk('../input/rsna-hemorrhage-jpg/train_jpg/train_jpg'):
    for file in files:
        if file.endswith('.jpg'):
            filenames.append(file)

print("Number test images hemorrhage positive: "+"{}".format(len(filenames)))

In [None]:
labels = pd.read_feather('../input/rsna-hemorrhage-jpg/meta/meta/labels.fth')

#manipulate the filenames list, stripping the .jpg at the end
idstosearch = [item.rstrip(".jpg") for item in filenames]

#now search the "ID" column for ids that correspond to our filenames
labels = labels[labels['ID'].isin(idstosearch)]

In [None]:
random.seed(10)
new_hem = labels[labels[['epidural', 'intraparenchymal', 'intraventricular', 'subarachnoid', 'subdural']].sum(1) >= 3].copy()
new_normal = labels[labels['any'] == 0].copy()
new_normal = new_normal.sample(n = 20000)
frames = [new_normal, new_hem]
new = pd.concat(frames)
new = new.sort_values('ID')
print("Number of images with hemorrhage: "+"{}".format(len(new_hem)))
print("Number of healthy images: "+"{}".format(len(new_normal)))
print("Percent of dataset with 3+ hemorrhage types: "+"{:.2%}".format(len(new_hem)/len(new)))

In [None]:
new, test = train_test_split(new, test_size = 0.1)
testlist = test['ID']
testids = ["../input/rsna-hemorrhage-jpg/train_jpg/train_jpg/"+ x + ".jpg" for x in testlist]
newlist = new['ID']
newids = ["../input/rsna-hemorrhage-jpg/train_jpg/train_jpg/"+ x + ".jpg" for x in newlist]

In [None]:
first_image = image.load_img(testids[0], target_size=(224, 224, 3))

testimages_arr = np.empty(len(testids), dtype = type(first_image))
for i in range(len(testids)):
    testimages_arr[i] = asarray(image.load_img(testids[i], target_size=(224, 224, 3)))

In [None]:
newimages_arr = np.empty(len(newids), dtype = type(first_image))
for i in range(len(newids)):
    newimages_arr[i] = asarray(image.load_img(newids[i], target_size=(224, 224, 3)))

In [None]:
newimages_arr

In [None]:
labels_new = new.iloc[:, 1]
print("Percent train images hemorrhage positive: "+"{:.2%}".format(labels_new.sum()/len(new)))
labels_test = test.iloc[:, 1]
print("Percent test images hemorrhage positive: "+"{:.2%}".format(labels_test.sum()/len(test)))

In [None]:
labels_new = pd.Series.to_numpy(labels_new)
labels_test = pd.Series.to_numpy(labels_test)

In [None]:
from keras.applications import resnet50

model = resnet50.ResNet50(weights="imagenet")

In [None]:
# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
#model.compile(optimizer=keras.optimizers.Adadelta(),
#              loss='binary_crossentropy',
#              metrics=['accuracy'])

In [None]:
# Train model
model.fit(newimages_arr, labels_new, epochs = 5, validation_split = 0.1)

In [None]:
#### PREVIOUSLY

# images = imread_collection(col_dir)
# images_arr = skimage.io.collection.concatenate_images(images)

In [None]:
# Validate model
#test_loss, test_acc = model.evaluate(images_val, labels_val, verbose=2)

#print('\nTest accuracy:', test_acc)