In [1]:
from matplotlib import pyplot as plt
from skimage.feature import hog
from skimage import exposure
from skimage.transform import resize

from sklearn.svm import SVC, NuSVC
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.cluster import KMeans
from sklearn import tree
from sklearn.linear_model import SGDClassifier

import os
import numpy as np
import nibabel as nib

import pandas as pd


In [2]:
def process_img(filepath, resize_l=250, resize_w=250, resize_d=64):
    scan = nib.load(filepath)
    scan = scan.get_fdata()
    
    scan = resize(scan, output_shape=(resize_l, resize_w, resize_d))
    scan = hog(scan, orientations=16, pixels_per_cell=(20,20), cells_per_block=(1,1), channel_axis=-1)
    return scan


def process_with_aug(filepath, resize_l=250, resize_w=250, resize_d=64):
    scan = nib.load(filepath)
    scan = scan.get_fdata()
    
    scan = resize(scan, output_shape=(resize_l, resize_w, resize_d))
    aug1 = [ele.T for ele in scan]
    aug2 = [ele.T for ele in aug1]
    scan = hog(scan, orientations=16, pixels_per_cell=(20,20), cells_per_block=(1,1), channel_axis=-1)
    aug1 = hog(aug1, orientations=16, pixels_per_cell=(20,20), cells_per_block=(1,1), channel_axis=-1)
    aug2 = hog(aug2, orientations=16, pixels_per_cell=(20,20), cells_per_block=(1,1), channel_axis=-1)
    return scan, aug1, aug2


In [3]:

data_folder = f'{os.getcwd()}/labeled_data/data/'

length = 0
for label in os.listdir(data_folder):
    length += len(os.listdir(f'{data_folder}/{label}/'))


# X = np.empty(shape=(length, resize_l, resize_w, 64,), dtype=np.float16)
X = []
Y = []

j = 0
print('Loading Images')
x,y,z = 0,0,0
for label in os.listdir(data_folder):
    img_directory = f'{data_folder}/{label}/'
    for img in os.listdir(img_directory): 
        
        img_path = '{}{}'.format(img_directory, img)
        img = nib.load(img_path)
        img 

Loading Images


In [4]:
# # test 
# resize_l = 250
# resize_w = 250
# resize_d = 64

# data_folder = f'{os.getcwd()}/labeled_data/data/'

# length = 0
# for label in os.listdir(data_folder):
#     length += len(os.listdir(f'{data_folder}/{label}/'))


# # X = np.empty(shape=(length, resize_l, resize_w, 64,), dtype=np.float16)
# X = []
# Y = []

# j = 0
# print('Loading Images')
# for label in os.listdir(data_folder):
#     img_directory = f'{data_folder}/{label}/'
#     for img in os.listdir(img_directory): 
#         if int(label) == 0:
#             img_path = '{}{}'.format(img_directory, img)
#             img = process_img(img_path, resize_l, resize_w, resize_d)
#             X += [img]
#             Y += [label]
#         else:
#             img_path = '{}{}'.format(img_directory, img)
#             img, aug1, aug2 = process_with_aug(img_path, resize_l, resize_w, resize_d)
#             X += [img, aug1, aug2]
#             Y += [label, label, label]
        

Loading Images


In [5]:
resize_l = 250
resize_w = 250
resize_d = 64

data_folder = f'{os.getcwd()}/labeled_data/data/'

length = 0
for label in os.listdir(data_folder):
    length += len(os.listdir(f'{data_folder}/{label}/'))


# X = np.empty(shape=(length, resize_l, resize_w, 64,), dtype=np.float16)
X = []
Y = []

print('Loading Images')
for label in os.listdir(data_folder):
    img_directory = f'{data_folder}/{label}/'
    for img in os.listdir(img_directory): 
        
        img_path = '{}{}'.format(img_directory, img)
        img = process_img(img_path, resize_l, resize_w, resize_d)
        X += [img]
        Y += [label]
        

Loading Images


In [6]:
train_x, test_x, train_y, test_y = train_test_split(X, Y, test_size=0.3, random_state=777)


In [7]:
classifier = tree.DecisionTreeClassifier()
classifier.fit(train_x, train_y)
y_pred = classifier.predict(test_x)
acc = np.mean(y_pred == test_y)
acc

0.6192052980132451

In [14]:
# classifier = SVC(kernel='poly', degree=3, gamma='scale', probability=True, class_weight='balanced')
params = {
    'C': np.logspace(-4, 4, 20),
    'degree': [1, 5],
    'kernel': ['linear','poly','rbf','sigmoid'],
    'coef0': np.logspace(-10, 10, 30)

    
}

weights ={
    '0': 1,
    '1': 2,
    '2': 2,
    '3': 2
}
classifier = SVC(class_weight=weights)
classifier = RandomizedSearchCV(classifier, params,  cv=10, n_jobs=-1, n_iter=100)
classifier.fit(train_x, train_y)
y_pred = classifier.predict(test_x)

acc = np.mean(y_pred == test_y)
acc

In [13]:
classifier.best_estimator_

# SVC(C=1438.44988828766, class_weight={'0': 1, '1': 2, '2': 2, '3': 2},
#     coef0=2043359717.8569396, degree=5)

In [10]:
data_folder = f'{os.getcwd()}/ATR_data/data/'

ground_truth = pd.read_excel('testing_filenames.xlsx', names=['filename'])
gt_x = []

for i in ground_truth.index:
    filename = ground_truth['filename'].iloc[i]

    img_path = '{}{}.nii.gz'.format(data_folder,filename[1:-1])
    gt_x += [process_img(img_path, resize_l, resize_w, resize_d)]

gt_y = classifier.predict(gt_x)




FileNotFoundError: No such file or no access: 'c:\Users\pjame\OneDrive\Documents\GitHub\ATR_Project/ATR_data/data/I135_2.nii.gz'

In [None]:
df = pd.DataFrame([ground_truth['filename'], gt_y]).T

df.columns = ['filename', 'label']
df.to_excel('y_predictions.xlsx', )


Unnamed: 0,filename
0,'I135_2'
1,'I135_3'
2,'I135_4'
3,'I135_5'
4,'I135_6'
...,...
461,'I193_18'
462,'I193_19'
463,'I193_20'
464,'I193_21'
