In [4]:
from matplotlib import pyplot as plt
from skimage.feature import hog
from skimage import exposure
from skimage.transform import resize

from sklearn.svm import SVC, NuSVC
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV,cross_val_score
from sklearn.cluster import KMeans
from sklearn import tree
from sklearn.linear_model import SGDClassifier
from sklearn.utils import shuffle

import os

import numpy as np
import nibabel as nib
import pandas as pd
from sklearn.decomposition import PCA

def process_img(filepath, resize_l=250, resize_w=250, resize_d=64):
    scan = nib.load(filepath)
    scan = scan.get_fdata()
    
    # scan = resize(scan, output_shape=(resize_l, resize_w, resize_d))
    # scan = hog(scan, orientations=16, pixels_per_cell=(20,20), cells_per_block=(1,1), channel_axis=-1)
    return scan

def create_padding(filepath, x=378, y=335, z=297):
    scan = nib.load(filepath)
    scan = scan.get_fdata()
    final_shape = [x,y,z]
    padding = []
    for i, dim in enumerate(final_shape):
        padding += [(0, dim - scan.shape[i])]

    return np.pad(scan, padding, mode='constant')

def process_with_pad(filepath, pca_model):
    img = create_padding(filepath)
    new_img = img.reshape((-1, img.shape[-1]))
    pca_model.fit(new_img)
    # return img

def transform(filepath, pca_model):
    img = create_padding(filepath)
    img = img.reshape((-1, img.shape[-1]))
    img = pca_model.transform(img)
    return img


In [5]:
# X = np.empty(shape=(1005,4608))
X = np.zeros(shape=(1005, 126630, 5), dtype=np.float16)
Y = []
data_folder = 'labeled_data/data/'
pca = PCA(n_components=5)

j = 0
for label in os.listdir(data_folder):
    img_directory = f'{data_folder}/{label}/'
    pca = PCA(n_components=5)

    for img in os.listdir(img_directory): 
        img_path = '{}{}'.format(img_directory, img)
        process_with_pad(img_path, pca)
        
    for img in os.listdir(img_directory):
        img_path = '{}{}'.format(img_directory, img)
        X[j] = transform(img_path, pca)
        Y += [label]
        j += 1

    print(pca.explained_variance_ratio_)

# np.save('X.npy', X)
# np.save('Y.npy', Y)

[0.88596636 0.03328481 0.01241224 0.00844529 0.00676744]
[0.86824759 0.0412706  0.0158113  0.01431913 0.00701977]
[0.72417956 0.09967993 0.07420282 0.04260554 0.0155832 ]
[0.85132592 0.04378721 0.02837231 0.01546959 0.01007566]


In [4]:
# # test 
# resize_l = 250
# resize_w = 250
# resize_d = 64

# data_folder = f'{os.getcwd()}/labeled_data/data/'

# length = 0
# for label in os.listdir(data_folder):
#     length += len(os.listdir(f'{data_folder}/{label}/'))


# # X = np.empty(shape=(length, resize_l, resize_w, 64,), dtype=np.float16)
# X = []
# Y = []

# j = 0
# print('Loading Images')
# for label in os.listdir(data_folder):
#     img_directory = f'{data_folder}/{label}/'
#     for img in os.listdir(img_directory): 
#         if int(label) == 0:
#             img_path = '{}{}'.format(img_directory, img)
#             img = process_img(img_path, resize_l, resize_w, resize_d)
#             X += [img]
#             Y += [label]
#         else:
#             img_path = '{}{}'.format(img_directory, img)
#             img, aug1, aug2 = process_with_aug(img_path, resize_l, resize_w, resize_d)
#             X += [img, aug1, aug2]
#             Y += [label, label, label]
        

Loading Images


Loading Images


In [24]:
train_x, test_x, train_y, test_y = train_test_split(X, Y, test_size=0.3, random_state=777)
X, Y = shuffle(X, Y, random_state=0)

In [53]:
# classifier = SVC(kernel='poly', degree=3, gamma='scale', probability=True, class_weight='balanced')
params = {
    'C': np.logspace(-4, 4, 10),
    'degree': [2,3,4],
    'kernel': ['poly'],
    'coef0': np.logspace(-4, 4, 10),
    # 'class_weight': [{0: 1, 1: w, 2: w, 3: w} for w in [2,3,4,5,6]]
}

weights ={
    '0': 1,
    '1': 2,
    '2': 2,
    '3': 2
}
classifier = SVC(class_weight=weights)
classifier = RandomizedSearchCV(classifier, params,  cv=5, n_jobs=-1)
classifier.fit(X,Y)

In [54]:
classifier.best_estimator_

In [63]:
weights ={
    '0': 0.001,
    '1': 2,
    '2': 2,
    '3': 2
}
# classifier = SVC(C=2.782559402207126, coef0=0.0001, class_weight=weights)
classifier = SVC(C=10000.0, coef0=10000.0, class_weight='balanced')
scores = cross_val_score(classifier, X, Y, cv=5)
np.mean(scores)

0.8258706467661691

In [57]:
# poly kernel
classifier = SVC(C=1291.5496650148827, class_weight={'0': 1, '1': 1, '2': 1, '3': 1},coef0=0.000774263682681127, degree=2, kernel='poly')
# classifier = SVC(C=1438.44988828766, class_weight={'0': 1, '1': 1, '2': 1, '3': 1}, coef0=2043359717.8569396, degree=5)
scores = cross_val_score(classifier, X, Y, cv=10)
print(np.mean(scores))

0.809930693069307


In [None]:
0.801910891089109
0.8039009900990098

In [13]:
classifier.best_estimator_

# SVC(C=1438.44988828766, class_weight={'0': 1, '1': 2, '2': 2, '3': 2},
#     coef0=2043359717.8569396, degree=5)

In [9]:
classifier = SVC(C=1438.44988828766, class_weight={'0': 1, '1': 2, '2': 2, '3': 2}, coef0=2043359717.8569396, degree=5)
classifier.fit(X, Y)

In [140]:
data_folder = f'{os.getcwd()}/unlabeled_data/'

ground_truth = pd.read_excel('testing_filenames.xlsx', names=['filename'], index_col=None, header=None)
gt_x = []

for i in ground_truth.index:
    filename = ground_truth['filename'].iloc[i]
    img_path = '{}{}.nii.gz'.format(data_folder,filename[1:-1])
    gt_x += [process_img(img_path, resize_l, resize_w, resize_d)]




In [165]:
classifier = SVC(C=1438.44988828766, class_weight={'0': 0.1, '1': 2, '2': 2, '3': 2}, coef0=2043359717.8569396, degree=5)
classifier.fit(X, Y)
gt_y = classifier.predict(gt_x)
gt_y

array(['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '3', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '2', '0', '3', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '3', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '3', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '2', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '1', '0', '0', '0', '0', '0', '0', '0', '0', '0', '2', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0

In [166]:
x = np.unique(gt_y, return_counts=True)
x

(array(['0', '1', '2', '3'], dtype='<U1'),
 array([445,   5,  11,   6], dtype=int64))

In [168]:
df = pd.DataFrame([ground_truth['filename'], gt_y]).T
df.columns = ['filename', 'label']
df.to_excel('y_predictions1.xlsx', )
df

Unnamed: 0,filename,label
0,'I135_1',0
1,'I135_2',0
2,'I135_3',0
3,'I135_4',0
4,'I135_5',0
...,...,...
462,'I193_18',0
463,'I193_19',0
464,'I193_20',0
465,'I193_21',0


array(['0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '3', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '2', '0', '3', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '3', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '3',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0',
       '0', '2', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '1',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '2', '0', '0', '0',
       '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0