In [1]:
import os
import glob
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input

from sklearn import svm
from sklearn.ensemble import IsolationForest

In [2]:
# using Arnold's method of loading data
train_path =  'MRIscans/Training/'
val_path = 'MRIscans/Validation/'
test_path = 'MRIscans/Testing/'

train_files = glob.glob(os.path.join(train_path, 'no_tumor_train', '*.*'))
val_files = glob.glob(os.path.join(val_path, 'no_tumor_val', '*.*'))

test_files_pos = glob.glob(os.path.join(test_path, 'tumor_test', '*.*'))
test_files_neg = glob.glob(os.path.join(test_path, 'no_tumor', '*.*'))
test_files = test_files_pos + test_files_neg

test_labels = np.array([0]*len(test_files_pos)+[1]*len(test_files_neg))

print(f'Number of training files: {len(train_files)}')
print(f'Number of validation files: {len(val_files)}')
print(f'Number of test files: {len(test_files)}')

print(f'\nNumber of tumor images in test set: {len(test_files_pos)}')
print(f'Number of non-tumor images in test set: {len(test_files_neg)}')

Number of training files: 1841
Number of validation files: 399
Number of test files: 867

Number of tumor images in test set: 129
Number of non-tumor images in test set: 738


In [3]:
print(train_files[0])
print(val_files[0])
print(test_files[0])

MRIscans/Training/no_tumor_train\1 no.jpeg
MRIscans/Validation/no_tumor_val\Tr-no_0825.jpg
MRIscans/Testing/tumor_test\p (179).jpg


In [4]:
def read_and_prep_imgs(files):
    # load the images from the path with size 250x250
    imgs = [load_img(img_path, target_size=(250, 250)) for img_path in files]
    # convert list of images to numpy array
    dataset = np.array([img_to_array(img) for img in imgs])
    # appropriately preprocess images for resnet use
    dataset = preprocess_input(dataset)
    return dataset

In [5]:
train_dataset = read_and_prep_imgs(train_files)
val_dataset = read_and_prep_imgs(val_files)
test_dataset = read_and_prep_imgs(test_files)

print('Training Set:    ', train_dataset.shape)
print('Validation Set:  ', val_dataset.shape)
print('Testing Set:     ', test_dataset.shape)

Training Set:     (1841, 250, 250, 3)
Validation Set:   (399, 250, 250, 3)
Testing Set:      (867, 250, 250, 3)


---

In [None]:
# preprocessing here

---

# One-Class SVM & Isolation Forest

BASIC IDEA\
first apply a pre-trained CNN to extract a meaningful compact representation of the images\
use those vectors as input to a one-class SVM

In [None]:
# Feature extraction with ResNet50
# Removing the prediction layer of the pretrained Resnet50 model allows features to quickly be extracted from selected images.

# don't include top since that FC layer is used for predictions
resnet_model = ResNet50(input_shape=(250,250,3), weights='imagenet', include_top=False, pooling='avg')

train_features = resnet_model.predict(train_dataset)
#val_features = resnet_model.predict(val_dataset)
#test_features = resnet_model.predict(test_dataset)
# TODO: haven't fully ran yet, taking over 10mins

In [None]:
# TODO: run after above solved
oneclass_svm = svm.OneClassSVM(kernel='rbf')
iso_forest = IsolationForest(n_estimators=100, n_jobs=-1)

oneclass_svm.fit(train_dataset)
iso_forest.fit(train_dataset)