Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add machine learning data and option to CBCT
- Loading branch information
Showing
40 changed files
with
327 additions
and
27 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
import os.path as osp | ||
import os | ||
import math | ||
|
||
import dicom | ||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
from pylinac import image | ||
from pylinac.core.io import TemporaryZipDirectory | ||
from scipy.misc import imresize | ||
from sklearn import preprocessing | ||
|
||
|
||
def is_dicom(path): | ||
"""Whether the file is a readable DICOM file via pydicom.""" | ||
try: | ||
ds = dicom.read_file(path, force=True) | ||
ds.pixel_array | ||
return True | ||
except: | ||
return False | ||
|
||
|
||
def get_files(folder, func): | ||
"""Get a list of files that are valid images from the folder.""" | ||
paths = [] | ||
for pdir, _, files in os.walk(folder): | ||
for file in files: | ||
filepath = osp.join(pdir, file) | ||
if func(filepath): | ||
paths.append(filepath) | ||
return paths | ||
|
||
|
||
def identify_images(zip_file): | ||
"""Interactively identify images from a folder, writing the labels to an array for later training""" | ||
with TemporaryZipDirectory(zip_file) as zfiles: | ||
filepaths = get_files(zfiles, is_dicom) | ||
feature_array = np.zeros((len(filepaths), 10000), dtype=np.float32) | ||
labels = np.zeros(len(filepaths)) | ||
split_val = 25 | ||
length = len(filepaths) | ||
rounds = int(math.ceil(length / split_val)) | ||
for n in range(rounds): | ||
fig, axes = plt.subplots(5, 5) | ||
for axis, (idx, fp) in zip(axes.flatten(), enumerate(filepaths[split_val*n:split_val*(n+1)])): | ||
img = image.load(fp) | ||
plt.sca(axis) | ||
plt.imshow(img.array, cmap=plt.cm.Greys) | ||
plt.axis('off') | ||
plt.title(idx+split_val*n) | ||
plt.show() | ||
# for idx, fp in enumerate(filepaths): | ||
# img = image.load(fp) | ||
# img.plot() | ||
# label = input("Input 0 or nothing if not an HU slice, 1 if it is:") | ||
# if label == '': | ||
# label = 0 | ||
# else: | ||
# label = 1 | ||
# labels.append(label) | ||
# feature_array[idx, :] = process_image(fp) | ||
not_done = True | ||
while not_done: | ||
label = input("Input the HU indices sequentially, one at a time. Type 'done' when finished:") | ||
if label == 'done': | ||
not_done = False | ||
else: | ||
labels[int(label)] = 1 | ||
# labels = np.array(labels) | ||
for idx, fp in enumerate(filepaths): | ||
feature_array[idx, :] = process_image(fp) | ||
scaled_features = preprocessing.minmax_scale(feature_array, axis=1) | ||
dir2write = osp.dirname(zip_file) | ||
np.save(osp.join(dir2write, 'images_' + osp.splitext(osp.basename(zip_file))[0]), scaled_features) | ||
np.save(osp.join(dir2write, 'labels_' + osp.splitext(osp.basename(zip_file))[0]), labels) | ||
|
||
|
||
def process_image(path): | ||
"""Load and resize the images and return as flattened numpy array""" | ||
img = image.load(path, dtype=np.float32) | ||
return imresize(img.array, size=(100, 100), mode='F').flatten() | ||
|
||
|
||
def load_images(): | ||
"""Load the built images for training.""" | ||
path = osp.join(osp.dirname(osp.abspath(__file__)), 'data') | ||
imgs = get_files(path, lambda x: 'images' in x) | ||
img_arr = np.vstack([np.load(f) for f in imgs]) | ||
labels = get_files(path, lambda x: 'labels' in x) | ||
labels_arr = np.concatenate([np.load(f) for f in labels]) | ||
return img_arr, labels_arr | ||
|
||
|
||
if __name__ == '__main__': | ||
data_dir = osp.join(osp.dirname(osp.abspath(__file__)), 'data') | ||
zsets = ( | ||
# osp.join(data_dir, 'Elekta_7.zip'), | ||
# osp.join(data_dir, 'Elekta_8.zip'), | ||
# osp.join(data_dir, 'Elekta_11.zip'), | ||
# osp.join(data_dir, 'Elekta_12.zip'), | ||
osp.join(data_dir, 'CBCT_3.zip'), | ||
# osp.join(data_dir, 'Standard head.zip'), | ||
) | ||
for zset in zsets: | ||
# path = osp.join(osp.dirname(osp.abspath(__file__)), 'data', 'thorax.zip') | ||
identify_images(zset) |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import gzip | ||
import pickle | ||
import time | ||
|
||
from sklearn import svm, metrics, cross_validation, grid_search | ||
|
||
from machinelearning.cbct.build_cbct_images import load_images | ||
|
||
data, labels = load_images() | ||
|
||
data_train, data_test, y_train, y_test = cross_validation.train_test_split(data, labels, train_size=0.85) | ||
|
||
parameters = { | ||
'kernel': ['linear'], | ||
'C': [1, 0.1, 0.05], | ||
} | ||
start = time.time() | ||
classifier = grid_search.GridSearchCV(svm.SVC(verbose=True), parameters) | ||
classifier.fit(data_train, y_train) | ||
print("Training took:", time.time() - start) | ||
|
||
for params, mean_score, scores in classifier.grid_scores_: | ||
print("%0.3f (+/-%0.03f) for %r" | ||
% (mean_score, scores.std() * 2, params)) | ||
print() | ||
print(classifier.best_estimator_) | ||
print("Best parameters found:") | ||
print(classifier.best_params_) | ||
print("With a training score of:") | ||
print(classifier.best_score_) | ||
print() | ||
print("Classification report:") | ||
print(metrics.classification_report(y_test, classifier.predict(data_test))) | ||
with gzip.open('cbct_classifier.pkl.gz', mode='wb') as m: | ||
pickle.dump(classifier, m) |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
"""A script to generate thumbnails of pylinac images for machine learning""" | ||
import os.path as osp | ||
import os | ||
import concurrent.futures | ||
import time | ||
|
||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
from pylinac import image | ||
from scipy.misc import imresize | ||
from sklearn import preprocessing | ||
|
||
|
||
def get_image_files(folder): | ||
"""Get a list of files that are valid images from the folder.""" | ||
futures = {} | ||
start = time.time() | ||
with concurrent.futures.ThreadPoolExecutor(max_workers=32) as exec: | ||
for pdir, _, files in os.walk(folder): | ||
for file in files: | ||
filepath = osp.join(pdir, file) | ||
future = exec.submit(image.is_image, filepath) | ||
futures[future] = filepath | ||
filepaths = [] | ||
for idx, future in enumerate(concurrent.futures.as_completed(futures)): | ||
if future.result(): | ||
filepaths.append(futures[future]) | ||
print("Done with {} in {:.2f}s".format(osp.basename(folder), time.time() - start)) | ||
return filepaths | ||
|
||
|
||
def process_image(path): | ||
"""Load and resize the images and return as flattened numpy array""" | ||
img = image.load(path, dtype=np.float32) | ||
return imresize(img.array, size=(100, 100), mode='F').flatten() | ||
|
||
|
||
def build_images(): | ||
"""Completely load, resize, and save the images for training. Main function.""" | ||
# get image file paths for each image type | ||
path_stub = r'D:\Users\James\Dropbox\Programming\Python\Projects\pylinac test files' | ||
pf_files = get_image_files(osp.join(path_stub, 'Picket Fences')) | ||
pipspro_files = get_image_files(osp.join(path_stub, '2D Image quality phantoms', 'PipsPro')) | ||
leeds_files = get_image_files(osp.join(path_stub, '2D Image quality phantoms', 'Leeds')) | ||
wl_files = get_image_files(osp.join(path_stub, 'Winston-Lutz')) | ||
# cbct_files = get_image_files(osp.join(path_stub, 'CBCTs')) | ||
filepaths = pf_files + pipspro_files + leeds_files + wl_files | ||
print("{} files found".format(len(filepaths))) | ||
|
||
# preallocate | ||
total_array = np.zeros((len(filepaths), 10000), dtype=np.float32) | ||
print("Training array preallocated") | ||
|
||
# resize each image and add to a training array | ||
start = time.time() | ||
futures = {} | ||
with concurrent.futures.ThreadPoolExecutor(max_workers=32) as exec: | ||
for idx, path in enumerate(filepaths): | ||
future = exec.submit(process_image, path) | ||
futures[future] = idx | ||
for idx, future in enumerate(concurrent.futures.as_completed(futures)): | ||
total_array[futures[future], :] = future.result() | ||
print("Training array set in {:.2f}s".format(time.time() - start)) | ||
|
||
# feature scale the images | ||
scaled_array = preprocessing.minmax_scale(total_array, feature_range=(0, 1), axis=1) | ||
print("Training array scaled") | ||
|
||
# save arrays to disk for future use | ||
np.save(osp.join(osp.dirname(osp.abspath(__file__)), 'images'), scaled_array) | ||
np.save(osp.join(osp.dirname(osp.abspath(__file__)), 'labels'), np.concatenate( | ||
(np.repeat(0, len(pf_files)), np.repeat(1, len(pipspro_files)), np.repeat(2, len(leeds_files)), np.repeat(3, len(wl_files))))) | ||
print("Images build") | ||
|
||
|
||
def load_images(): | ||
"""Load the built images for training.""" | ||
return np.load('images.npy'), np.load('labels.npy') | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
from sklearn import svm, metrics, cross_validation, grid_search | ||
import pickle | ||
import gzip | ||
|
||
from pylinac_ml.pylinac_images import load_images | ||
|
||
# build_images() | ||
data, labels = load_images() | ||
|
||
data_train, data_test, y_train, y_test = cross_validation.train_test_split(data, labels, train_size=0.8) | ||
|
||
parameters = { | ||
'kernel': ['rbf'], | ||
'C': [10], | ||
'gamma': [0.001] | ||
} | ||
classifier = grid_search.GridSearchCV(svm.SVC(verbose=True), parameters) | ||
# parameters = { | ||
# 'hidden_layer_sizes': [(2500,)], | ||
# 'activation': ['relu'], | ||
# 'alpha': [0.1, 0.01, 10], | ||
# 'algorithm': ['adam'], | ||
# 'tol': [0.01], | ||
# 'learning_rate': ['invscaling'] | ||
# } | ||
# classifier = model_selection.GridSearchCV(neural_network.MLPClassifier(verbose=True), parameters) | ||
|
||
classifier.fit(data_train, y_train) | ||
|
||
for params, mean_score, scores in classifier.grid_scores_: | ||
print("%0.3f (+/-%0.03f) for %r" | ||
% (mean_score, scores.std() * 2, params)) | ||
print() | ||
print(classifier.best_estimator_) | ||
print("Best parameters found:") | ||
print(classifier.best_params_) | ||
print("With a training score of:") | ||
print(classifier.best_score_) | ||
print() | ||
print("Classification report:") | ||
print(metrics.classification_report(y_test, classifier.predict(data_test))) | ||
# print(metrics.f1_score(y_train, classifier.predict(iris_train), average='binary')) | ||
# print("And test score of") | ||
# print(metrics.f1_score(y_test, classifier.predict(data_test), average='binary')) | ||
with gzip.open('pylinac_model.pkl.gz', mode='wb') as m: | ||
pickle.dump(classifier, m) | ||
|
||
# with gzip.open('pylinac_model.pkl.gz', mode='rb') as m: | ||
# unp_clf = pickle.load(m) | ||
|
||
# print("Classification report after pickling/unpickling:") | ||
# print(metrics.classification_report(y_test, unp_clf.predict(data_test))) |
Oops, something went wrong.