Skip to content

Commit

Permalink
add machine learning data and option to CBCT
Browse files Browse the repository at this point in the history
  • Loading branch information
jrkerns committed May 18, 2016
1 parent 93607ce commit 8953c4e
Show file tree
Hide file tree
Showing 40 changed files with 327 additions and 27 deletions.
107 changes: 107 additions & 0 deletions machinelearning/cbct/build_cbct_images.py
@@ -0,0 +1,107 @@
import os.path as osp
import os
import math

import dicom
import matplotlib.pyplot as plt
import numpy as np
from pylinac import image
from pylinac.core.io import TemporaryZipDirectory
from scipy.misc import imresize
from sklearn import preprocessing


def is_dicom(path):
"""Whether the file is a readable DICOM file via pydicom."""
try:
ds = dicom.read_file(path, force=True)
ds.pixel_array
return True
except:
return False


def get_files(folder, func):
"""Get a list of files that are valid images from the folder."""
paths = []
for pdir, _, files in os.walk(folder):
for file in files:
filepath = osp.join(pdir, file)
if func(filepath):
paths.append(filepath)
return paths


def identify_images(zip_file):
"""Interactively identify images from a folder, writing the labels to an array for later training"""
with TemporaryZipDirectory(zip_file) as zfiles:
filepaths = get_files(zfiles, is_dicom)
feature_array = np.zeros((len(filepaths), 10000), dtype=np.float32)
labels = np.zeros(len(filepaths))
split_val = 25
length = len(filepaths)
rounds = int(math.ceil(length / split_val))
for n in range(rounds):
fig, axes = plt.subplots(5, 5)
for axis, (idx, fp) in zip(axes.flatten(), enumerate(filepaths[split_val*n:split_val*(n+1)])):
img = image.load(fp)
plt.sca(axis)
plt.imshow(img.array, cmap=plt.cm.Greys)
plt.axis('off')
plt.title(idx+split_val*n)
plt.show()
# for idx, fp in enumerate(filepaths):
# img = image.load(fp)
# img.plot()
# label = input("Input 0 or nothing if not an HU slice, 1 if it is:")
# if label == '':
# label = 0
# else:
# label = 1
# labels.append(label)
# feature_array[idx, :] = process_image(fp)
not_done = True
while not_done:
label = input("Input the HU indices sequentially, one at a time. Type 'done' when finished:")
if label == 'done':
not_done = False
else:
labels[int(label)] = 1
# labels = np.array(labels)
for idx, fp in enumerate(filepaths):
feature_array[idx, :] = process_image(fp)
scaled_features = preprocessing.minmax_scale(feature_array, axis=1)
dir2write = osp.dirname(zip_file)
np.save(osp.join(dir2write, 'images_' + osp.splitext(osp.basename(zip_file))[0]), scaled_features)
np.save(osp.join(dir2write, 'labels_' + osp.splitext(osp.basename(zip_file))[0]), labels)


def process_image(path):
"""Load and resize the images and return as flattened numpy array"""
img = image.load(path, dtype=np.float32)
return imresize(img.array, size=(100, 100), mode='F').flatten()


def load_images():
"""Load the built images for training."""
path = osp.join(osp.dirname(osp.abspath(__file__)), 'data')
imgs = get_files(path, lambda x: 'images' in x)
img_arr = np.vstack([np.load(f) for f in imgs])
labels = get_files(path, lambda x: 'labels' in x)
labels_arr = np.concatenate([np.load(f) for f in labels])
return img_arr, labels_arr


if __name__ == '__main__':
data_dir = osp.join(osp.dirname(osp.abspath(__file__)), 'data')
zsets = (
# osp.join(data_dir, 'Elekta_7.zip'),
# osp.join(data_dir, 'Elekta_8.zip'),
# osp.join(data_dir, 'Elekta_11.zip'),
# osp.join(data_dir, 'Elekta_12.zip'),
osp.join(data_dir, 'CBCT_3.zip'),
# osp.join(data_dir, 'Standard head.zip'),
)
for zset in zsets:
# path = osp.join(osp.dirname(osp.abspath(__file__)), 'data', 'thorax.zip')
identify_images(zset)
Binary file added machinelearning/cbct/cbct_classifier.pkl.gz
Binary file not shown.
35 changes: 35 additions & 0 deletions machinelearning/cbct/cbct_learner.py
@@ -0,0 +1,35 @@
import gzip
import pickle
import time

from sklearn import svm, metrics, cross_validation, grid_search

from machinelearning.cbct.build_cbct_images import load_images

data, labels = load_images()

data_train, data_test, y_train, y_test = cross_validation.train_test_split(data, labels, train_size=0.85)

parameters = {
'kernel': ['linear'],
'C': [1, 0.1, 0.05],
}
start = time.time()
classifier = grid_search.GridSearchCV(svm.SVC(verbose=True), parameters)
classifier.fit(data_train, y_train)
print("Training took:", time.time() - start)

for params, mean_score, scores in classifier.grid_scores_:
print("%0.3f (+/-%0.03f) for %r"
% (mean_score, scores.std() * 2, params))
print()
print(classifier.best_estimator_)
print("Best parameters found:")
print(classifier.best_params_)
print("With a training score of:")
print(classifier.best_score_)
print()
print("Classification report:")
print(metrics.classification_report(y_test, classifier.predict(data_test)))
with gzip.open('cbct_classifier.pkl.gz', mode='wb') as m:
pickle.dump(classifier, m)
Binary file added machinelearning/cbct/data/images_CBCT_3.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/images_CBCT_4.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/images_Elekta_11.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/images_Elekta_12.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/images_Elekta_7.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/images_Elekta_8.npy
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file added machinelearning/cbct/data/images_Pelvis-Obese.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/images_Pelvis.npy
Binary file not shown.
Binary file not shown.
Binary file added machinelearning/cbct/data/images_Toshiba.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/images_UNC120kV.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/images_thorax.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/labels_CBCT_3.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/labels_CBCT_4.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/labels_Elekta_11.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/labels_Elekta_12.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/labels_Elekta_7.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/labels_Elekta_8.npy
Binary file not shown.
Binary file not shown.
Binary file added machinelearning/cbct/data/labels_Low dose head.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/labels_Low dose thorax.npy
Binary file not shown.
Binary file not shown.
Binary file added machinelearning/cbct/data/labels_Pelvis-Obese.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/labels_Pelvis.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/labels_Standard head.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/labels_Toshiba.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/labels_UNC120kV.npy
Binary file not shown.
Binary file added machinelearning/cbct/data/labels_thorax.npy
Binary file not shown.
Binary file added machinelearning/pylinac_classifier.pkl
Binary file not shown.
79 changes: 79 additions & 0 deletions machinelearning/pylinac_images.py
@@ -0,0 +1,79 @@
"""A script to generate thumbnails of pylinac images for machine learning"""
import os.path as osp
import os
import concurrent.futures
import time

import matplotlib.pyplot as plt
import numpy as np
from pylinac import image
from scipy.misc import imresize
from sklearn import preprocessing


def get_image_files(folder):
"""Get a list of files that are valid images from the folder."""
futures = {}
start = time.time()
with concurrent.futures.ThreadPoolExecutor(max_workers=32) as exec:
for pdir, _, files in os.walk(folder):
for file in files:
filepath = osp.join(pdir, file)
future = exec.submit(image.is_image, filepath)
futures[future] = filepath
filepaths = []
for idx, future in enumerate(concurrent.futures.as_completed(futures)):
if future.result():
filepaths.append(futures[future])
print("Done with {} in {:.2f}s".format(osp.basename(folder), time.time() - start))
return filepaths


def process_image(path):
"""Load and resize the images and return as flattened numpy array"""
img = image.load(path, dtype=np.float32)
return imresize(img.array, size=(100, 100), mode='F').flatten()


def build_images():
"""Completely load, resize, and save the images for training. Main function."""
# get image file paths for each image type
path_stub = r'D:\Users\James\Dropbox\Programming\Python\Projects\pylinac test files'
pf_files = get_image_files(osp.join(path_stub, 'Picket Fences'))
pipspro_files = get_image_files(osp.join(path_stub, '2D Image quality phantoms', 'PipsPro'))
leeds_files = get_image_files(osp.join(path_stub, '2D Image quality phantoms', 'Leeds'))
wl_files = get_image_files(osp.join(path_stub, 'Winston-Lutz'))
# cbct_files = get_image_files(osp.join(path_stub, 'CBCTs'))
filepaths = pf_files + pipspro_files + leeds_files + wl_files
print("{} files found".format(len(filepaths)))

# preallocate
total_array = np.zeros((len(filepaths), 10000), dtype=np.float32)
print("Training array preallocated")

# resize each image and add to a training array
start = time.time()
futures = {}
with concurrent.futures.ThreadPoolExecutor(max_workers=32) as exec:
for idx, path in enumerate(filepaths):
future = exec.submit(process_image, path)
futures[future] = idx
for idx, future in enumerate(concurrent.futures.as_completed(futures)):
total_array[futures[future], :] = future.result()
print("Training array set in {:.2f}s".format(time.time() - start))

# feature scale the images
scaled_array = preprocessing.minmax_scale(total_array, feature_range=(0, 1), axis=1)
print("Training array scaled")

# save arrays to disk for future use
np.save(osp.join(osp.dirname(osp.abspath(__file__)), 'images'), scaled_array)
np.save(osp.join(osp.dirname(osp.abspath(__file__)), 'labels'), np.concatenate(
(np.repeat(0, len(pf_files)), np.repeat(1, len(pipspro_files)), np.repeat(2, len(leeds_files)), np.repeat(3, len(wl_files)))))
print("Images build")


def load_images():
"""Load the built images for training."""
return np.load('images.npy'), np.load('labels.npy')

52 changes: 52 additions & 0 deletions machinelearning/pylinac_learner.py
@@ -0,0 +1,52 @@
from sklearn import svm, metrics, cross_validation, grid_search
import pickle
import gzip

from pylinac_ml.pylinac_images import load_images

# build_images()
data, labels = load_images()

data_train, data_test, y_train, y_test = cross_validation.train_test_split(data, labels, train_size=0.8)

parameters = {
'kernel': ['rbf'],
'C': [10],
'gamma': [0.001]
}
classifier = grid_search.GridSearchCV(svm.SVC(verbose=True), parameters)
# parameters = {
# 'hidden_layer_sizes': [(2500,)],
# 'activation': ['relu'],
# 'alpha': [0.1, 0.01, 10],
# 'algorithm': ['adam'],
# 'tol': [0.01],
# 'learning_rate': ['invscaling']
# }
# classifier = model_selection.GridSearchCV(neural_network.MLPClassifier(verbose=True), parameters)

classifier.fit(data_train, y_train)

for params, mean_score, scores in classifier.grid_scores_:
print("%0.3f (+/-%0.03f) for %r"
% (mean_score, scores.std() * 2, params))
print()
print(classifier.best_estimator_)
print("Best parameters found:")
print(classifier.best_params_)
print("With a training score of:")
print(classifier.best_score_)
print()
print("Classification report:")
print(metrics.classification_report(y_test, classifier.predict(data_test)))
# print(metrics.f1_score(y_train, classifier.predict(iris_train), average='binary'))
# print("And test score of")
# print(metrics.f1_score(y_test, classifier.predict(data_test), average='binary'))
with gzip.open('pylinac_model.pkl.gz', mode='wb') as m:
pickle.dump(classifier, m)

# with gzip.open('pylinac_model.pkl.gz', mode='rb') as m:
# unp_clf = pickle.load(m)

# print("Classification report after pickling/unpickling:")
# print(metrics.classification_report(y_test, unp_clf.predict(data_test)))

0 comments on commit 8953c4e

Please sign in to comment.