In [13]:
# db = int(input('Database ID (2 for 4 chamber and 17 for short axis): '))
# basedir = input('Base directory (e.g. D:/ML_data/PAH): ')
# scale = int(input('Scale (16, 8, 4, or -1): '))
# mask_id = int(input('Mask ID (1-5): '))
# level = int(input('Preprocessing level (1-4): '))

In [1]:
import os
import h5py
import numpy as np
import pandas as pd

### Step 0: Converting .mat file to .npy (ndarray) file

In [3]:
def mat2npy(basedir, db):  
    fname = 'PAH1DB%s.mat' % db
    print('Converting %s to ndarray' % fname)
    data_path = os.path.join(basedir, fname)
    f = h5py.File(data_path, 'r')

    data = f['data'][()].transpose()
    out_path = os.path.join(basedir, 'PAH1DB%s.npy' % db)
    np.save(out_path, data)
    # data_ = torch.from_numpy(data).to_sparse()
    # out_path = os.path.join(basedir, 'RegPAH1DB%s.hdf5' % db)
    # f = h5py.File(out_path, "w")
    # dest = f.create_dataset()

    labels = f['labels'][()].reshape(-1)
    # max_dist = f['maxDists'][()].reshape(-1)
    # df = pd.DataFrame(data={'Label': labels, 'Max dist': max_dist})
    df = pd.DataFrame(data={'Label': labels})
    csv_path = os.path.join(basedir, 'info_DB%s.csv' % db)
    df.to_csv(csv_path)
    print('Completed!')

In [10]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

interact_matmul(mat2npy, db=widgets.Dropdown(
    options=[('Four chamber', 2), ('Short axis',17)],
    value=2,
    description='Database',
    disabled=False,
), basedir=input('Base directory (e.g. D:/ML_data/PAH): '))


Base directory (e.g. D:/ML_data/PAH): D:/ML_data/PAH


interactive(children=(Text(value='D:/ML_data/PAH', description='basedir'), Dropdown(description='Database', op…

<function __main__.mat2npy(basedir, db)>

## Step 1: Registration

In [88]:
def load_data(basedir, db):
    data_path = os.path.join(basedir, 'PAH1DB%s.npy' % db)
    data = np.load(data_path)
    return data

def load_landmark(basedir, db):
    reg_fpath = os.path.join(basedir, 'regMRI4ChFull.xlsx')
    if db == 2:
        sheet_name = '4ch'
        # sheet_name = 0
        col_names = ['ID', 'Group', 'mitral ann X', 'mitral ann Y',
                     'LVEDV apex X', 'LVEDV apex Y', 'Spinal X', 'Spinal Y']
    elif db == 17:
        sheet_name = 'SA'
        col_names = ['ID', 'Group', 'inf insertion point X', 'insertion point Y',
                     'sup insertion point X', 'sup insertion point Y', 'RV inf X', 'RV inf Y']

    reg_df = pd.read_excel(reg_fpath, sheet_name=sheet_name, usecols=col_names)
    
    return reg_df

In [2]:
import sys
sys.path.append('..')
from kale.prepdata.prep_cmr import regMRI

def proc_reg(basedir, db, sample_id=1007):
    print('Performing registration...')
    data = load_data(basedir, db)
    reg_df = load_landmark(basedir, db)

    reg_id = np.where(reg_df['ID'] == sample_id)[0][0]
    data_reg, max_dist = regMRI(data, reg_df, reg_id)
    out_path = os.path.join(basedir, 'RegPAH1DB%s.npy' % db)
    np.save(out_path, data_reg)

    info_file = os.path.join(basedir, 'info_DB%s.csv' % db)
    if os.path.exists(info_file):
        info_df = pd.read_csv(info_file, index_col=0)
    else:
        info_df = pd.DataFrame(data={'Label': reg_file['Group'].values})
    info_df['ID'] = reg_file['ID']
    info_df['Max Dist'] = max_dist
    info_df.to_csv(info_file, columns=['ID', 'Label', 'Max Dist'], index=False)

    print('Registration Completed')

In [3]:
from ipywidgets import interact_manual
import ipywidgets as widgets

interact_manual(proc_reg, db=widgets.Dropdown(
    options=[('Four chamber (2)', 2), ('Short axis (17)',17)],
    value=2,
    description='Database',
    disabled=False,), 
         basedir=input('Base directory (e.g. D:/ML_data/PAH): '),
         sample_id=int(input('Target sample ID used for regisitration (1007): '))
        )

Base directory (e.g. D:/ML_data/PAH): D:/ML_data/PAH
Target sample ID used for regisitration (1007): 1007


interactive(children=(Text(value='D:/ML_data/PAH', description='basedir'), Dropdown(description='Database', op…

<function __main__.proc_reg(basedir, db, sample_id=1007)>

## Step 2: Rescaling

In [4]:
import sys
sys.path.append('..')
from kale.prepdata.prep_cmr import rescale_cmr

def proc_rescale(basedir, db, scale=-1):
    data_path = os.path.join(basedir, 'RegPAH1DB%s.npy' % db)
    data = np.load(data_path)
    out_dir = os.path.join(basedir, 'DB%s' % db)
    print('Rescaling data ...')
    if not os.path.exists(out_dir):
        os.mkdir(out_dir)

    if scale == -1:
        for scale_ in [16, 8, 4]:
            print('Scale: 1/%s' % scale_)
            data_ = rescale_cmr(data, scale=scale_)
            out_path = os.path.join(out_dir, 'NoPrs%sDB%s.npy' % (scale_, db))
            np.save(out_path, data_)
    else:
        print('Scale: 1/%s' % scale)
        data_ = rescale_cmr(data, scale=scale)
        out_path = os.path.join(out_dir, 'NoPrs%sDB%s.npy' % (scale, db))
        np.save(out_path, data_)

    print('Completed!')

In [5]:
from ipywidgets import interact_manual
import ipywidgets as widgets

interact_manual(proc_rescale, db=widgets.Dropdown(
    options=[('Four chamber (2)', 2), ('Short axis (17)',17)],
    value=2,
    description='Database',
    disabled=False,), 
         basedir=input('Base directory (e.g. D:/ML_data/PAH): '),
         scale=widgets.Dropdown(
    options=[('16', 16), ('8', 8), ('4', 4), ('-1 (All of above)', -1)],
    value=4,
    description='Scale',
    disabled=False,)
        )

Base directory (e.g. D:/ML_data/PAH): D:/ML_data/PAH


interactive(children=(Text(value='D:/ML_data/PAH', description='basedir'), Dropdown(description='Database', op…

<function __main__.proc_rescale(basedir, db, scale=-1)>

## Step 3: Preprocessing

In [6]:
import sys
sys.path.append('..')
from kale.prepdata.prep_cmr import cmr_proc

# if scale == -1:
#     for scale_ in [16, 8, 4]:
#         cmr_proc(basedir, db, scale_, mask_id, level, save_data=True)
# else:
#     cmr_proc(basedir, db, scale, mask_id, level, save_data=True)

interact_manual(cmr_proc, db=widgets.Dropdown(
    options=[('Four chamber (2)', 2), ('Short axis (17)',17)],
    value=2,
    description='Database',
    disabled=False,), 
         basedir=input('Base directory (e.g. D:/ML_data/PAH): '),
         scale=widgets.Dropdown(
    options=[('16', 16), ('8', 8), ('4', 4), ('-1 (All of above)', -1)],
    value=4,
    description='Scale',
    disabled=False,),
         mask_id=widgets.Dropdown(
    options=[('1', 1), ('2', 2), ('3', 3), ('4', 4), ('5', 5), ('6', 6), ('7', 7), ('8', 8)],
    value=5,
    description='Mask ID:',
    disabled=False,),
         level=widgets.Dropdown(
    options=[('1', 1), ('2', 2), ('3', 3), ('4', 4)],
    value=1,
    description='Preprocssing level:',
    disabled=False,),
        )

Base directory (e.g. D:/ML_data/PAH): D:/ML_data/PAH


interactive(children=(Text(value='D:/ML_data/PAH', description='basedir'), Dropdown(description='Database', op…

<function kale.prepdata.prep_cmr.cmr_proc(basedir, db, scale, mask_id, level, save_data=True, return_data=False)>

# Classification

In [26]:
import sys
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.feature_selection import SelectFromModel
from sklearn.svm import SVC, LinearSVC
from sklearn.linear_model import LogisticRegression, RidgeClassifier, Lasso
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV


default_grid = [
    {'select__estimator__C': np.logspace(-2, 2, 5)},
    {'clf__C': np.logspace(-3, 2, 6), 'clf__kernel': ['linear']},
    {'clf__C': np.logspace(-3, 2, 6), 'clf__gamma': np.logspace(-4, -1, 3),
     'clf__kernel': ['rbf']},
    ]

# clf = Pipeline([
#   ('feature_selection', SelectFromModel(LinearSVC(penalty="l1"))),
#   ('classification', RandomForestClassifier())
# ])


class _Classifier(BaseEstimator, TransformerMixin):
    def __init__(self, clf='SVC', param_grid=default_grid, cv=None, n_split=10, test_size=0.2, n_jobs=1):
        if clf == 'SVC':
            # _clf = Pipeline([('select', SelectFromModel(LinearSVC(penalty='l1', loss='hinge'))),
            _clf = Pipeline([('select', SelectFromModel(estimator=LogisticRegression(penalty='l1', solver='liblinear'))),
                             ('clf', SVC(max_iter=10000, probability=True))])
        elif clf == 'LR':
            _clf = Pipeline([('select', SelectFromModel(Lasso())),
                             ('clf', LogisticRegression(max_iter=10000))])
        elif clf == 'Ridge':
            _clf = Pipeline([('select', SelectFromModel(Lasso())),
                             ('clf', RidgeClassifier(max_iter=10000))])
        else:
            print('Invalid Classifier')
            sys.exit()
        
        print(param_grid)
        if cv is None:
            cv = StratifiedShuffleSplit(n_splits=n_split, test_size=test_size,
                                        train_size=1 - test_size, random_state=144)
        self.search = GridSearchCV(_clf, param_grid, n_jobs=n_jobs, cv=cv, iid=False)

    def fit(self, X, y):
        self.search.fit(X, y)
        self.clf = self.search.best_estimator_
        self.clf.fit(X, y)

        return self

    def predict(self, X):
        return self.clf.predict(X)
    
    def predict_proba(self, X):
        return self.clf.predict_proba(X)

In [28]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score, roc_auc_score
from tensorly.base import fold, unfold


def label_binarizer(y):
    y_ = np.zeros(y.shape)
    y_[np.where(y != 0)] = 1
    return y_


def evaluate_(X, y, kfold=10, random_state=144, return_auc=True):
    skf = StratifiedKFold(n_splits=kfold, random_state=random_state)
    res = {'fold_accs': [], 'fold_aucs': [], 'acc': None,'auc': None}
    y_pred = np.zeros(y.shape)
    y_dec = np.zeros(y.shape)
    for train, test in skf.split(X, y):
        clf = _Classifier()
        clf.fit(X[train], y[train])
        y_pred[test] = clf.predict(X[test])
        res['fold_accs'].append(accuracy_score(y[test], y_pred[test]))
        if return_auc:
            y_dec[test] = clf.predict_proba(X[test])[:, 1]
            res['fold_aucs'].append(roc_auc_score(y[test], y_dec[test]))
    res['acc'] = accuracy_score(y, y_pred)
    if return_auc:
        res['auc'] = roc_auc_score(y, y_dec)

    return res

In [16]:
import sys
# sys.path.append('...')
from kale.embed.mpca import MPCA

def main_(basedir, db, scale, mask_id, level):
    print('Main Experiemnts for Scale: 1/%s, Mask ID: %s, Processing level: %s' % (scale, mask_id, level))
    data_path = '%s/DB%s/PrepData' % (basedir, db)
    fname = 'PrS%sM%sL%sDB%s.npy' % (scale, mask_id, level, db)
    X = np.load(os.path.join(data_path, fname))
    info_df = pd.read_csv(os.path.join(basedir, 'info_DB%s.csv' % db))
    y = info_df['Label'].values
    y_ = label_binarizer(y)

    # Peform MPCA dimension reduction
    mpca = MPCA()
    mpca.fit(X)
    Xmpc = mpca.transform(X)
    X_ = unfold(Xmpc, mode=-1).real

    # Evaluating 
    res = evaluate_(X_, y_)

    print('Accuracy:', res['acc'], 'AUC:', res['auc'])

In [29]:
from ipywidgets import interact_manual
import ipywidgets as widgets

interact_manual(main_, db=widgets.Dropdown(
    options=[('Four chamber (2)', 2), ('Short axis (17)',17)],
    value=2,
    description='Database',
    disabled=False,), 
                basedir=input('Base directory (e.g. D:/ML_data/PAH): '),
                scale=widgets.Dropdown(
                    options=[('16', 16), ('8', 8), ('4', 4), ('-1 (All of above)', -1)],
                    value=4,
                    description='Scale',
                    disabled=False,),
                mask_id=widgets.Dropdown(
                    options=[('1', 1), ('2', 2), ('3', 3), ('4', 4), ('5', 5), ('6', 6), ('7', 7), ('8', 8)],
                    value=5,
                    description='Mask ID:',
                    disabled=False,),
                level=widgets.Dropdown(
                    options=[('1', 1), ('2', 2), ('3', 3), ('4', 4)],
                    value=1,
                    description='Preprocssing level:',
                    disabled=False,),
               )

Base directory (e.g. D:/ML_data/PAH): D:/ML_data/PAH


interactive(children=(Text(value='D:/ML_data/PAH', description='basedir'), Dropdown(description='Database', op…

<function __main__.main_(basedir, db, scale, mask_id, level)>

## Landmark Visulaisation

In [51]:
import tkinter

from matplotlib.backends.backend_tkagg import (
    FigureCanvasTkAgg, NavigationToolbar2Tk)
# Implement the default Matplotlib key bindings.
from matplotlib.backend_bases import key_press_handler
from matplotlib.figure import Figure
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np

In [85]:
def sub_img_mark(basedir, db, sub, slice_):
    data = load_data(basedir, db)
    reg_df = load_landmark(basedir, db)
    
    sub_idx = np.where(reg_df['ID'] == sub)[0][0]
    sub_img = data[..., slice_, sub_idx]
    land_marks = reg_file.iloc[sub_idx, 2:]
    
    return sub_img, land_marks

### Display landmarks

In [80]:
def disp_mark(basedir, db, sub, slice_):
    sub_img, land_marks = sub_img_mark(basedir, db, sub, slice_)
    marks = land_marks.values.reshape((-1, 2))
    mark_name = land_marks.index.values.reshape((-1, 2))
    n_marks = marks.shape[0]
    
    root = tkinter.Tk()
    root.wm_title("Subject %s Slice %s" % (sub, slice_))

    root.image = sub_img
    fig = plt.figure(figsize=(8, 5))
    ax = fig.add_subplot(111)
    im = ax.imshow(root.image)
    for i in range(n_marks):
        ix = marks[i, 0]
        iy = marks[i, 1]
        print('%s: %s, %s: %s' % (mark_name[i, 0], ix, mark_name[i, 1], iy))
        ax.plot(ix,iy, marker='o', markersize=8, markerfacecolor=(1, 1, 1, 0.1),markeredgewidth=1.5, markeredgecolor='r')
    plt.show()
#     canvas = FigureCanvasTkAgg(fig, master=root)  
#     canvas.draw()
#     canvas.get_tk_widget().pack(side=tkinter.TOP, fill=tkinter.BOTH, expand=1)

#     toolbar = NavigationToolbar2Tk(canvas, root)
#     toolbar.update()
#     canvas.get_tk_widget().pack(side=tkinter.TOP, fill=tkinter.BOTH, expand=1)

In [81]:
from ipywidgets import interact_manual
import ipywidgets as widgets

interact_manual(disp_mark, db=widgets.Dropdown(
    options=[('Four chamber (2)', 2), ('Short axis (17)',17)],
    value=2,
    description='Database',
    disabled=False,), 
                basedir=input('Base directory (e.g. D:/ML_data/PAH): '),
                sub=int(input('Subject ID (e.g. 1005):')),
                slice_=int(input('Slice:')), 
         
        )

Base directory (e.g. D:/ML_data/PAH): D:/ML_data/PAH
Subject ID (e.g. 1005):1005
Slice:0


interactive(children=(Text(value='D:/ML_data/PAH', description='basedir'), Dropdown(description='Database', op…

<function __main__.disp_mark(basedir, db, sub, slice_)>

### Interactive Marking (Get coords manually)

In [82]:
def onclick(event):
    global ix, iy
    ix, iy = event.xdata, event.ydata
#     print('%s click: button=%d, x=%d, y=%d, xdata=%f, ydata=%f' %
#           ('double' if event.dblclick else 'single', event.button,
#            event.x, event.y, event.xdata, event.ydata))
    print('%s click: button=%d, x=%f, y=%f' %
          ('double' if event.dblclick else 'single', 
           event.button, event.xdata, event.ydata))    
#     ax = fig.add_subplot(111)
    ax.plot(ix,iy, marker='o', markersize=8, markerfacecolor=(1, 1, 1, 0.1),markeredgewidth=1.5, markeredgecolor='r')
    canvas.draw()

    global coords
    coords.append((ix, iy))

#     if len(coords) == 2:
#         fig.canvas.mpl_disconnect(cid)

    return coords


def _quit():
    root.quit()     # stops mainloop
    root.destroy()  # this is necessary on Windows to prevent
                    # Fatal Python Error: PyEval_RestoreThread: NULL tstate


def hand_mark(basedir, db, sub, slice_):
    sub_img, land_marks = sub_img_mark(basedir, db, sub, slice_)
    global root, fig, im, ax, canvas, coords
    root = tkinter.Tk()
    root.wm_title("Subject %s Slice %s" % (sub, slice_))

    # fig = Figure(figsize=(5, 4), dpi=100)
    # t = np.arange(0, 3, .01)
    # fig.add_subplot(111).plot(t, 2 * np.sin(2 * np.pi * t))

#     root.image = plt.imread('index.png')
#     root.image = plt.imshow(sub_img, cmap='gray', vmin=0, vmax=255)
    root.image = sub_img
    fig = plt.figure(figsize=(8, 5))
    ax = fig.add_subplot(111)
    im = ax.imshow(root.image)

    canvas = FigureCanvasTkAgg(fig, master=root)  # A tk.DrawingArea.
    canvas.draw()
    canvas.get_tk_widget().pack(side=tkinter.TOP, fill=tkinter.BOTH, expand=1)

    toolbar = NavigationToolbar2Tk(canvas, root)
    toolbar.update()
    canvas.get_tk_widget().pack(side=tkinter.TOP, fill=tkinter.BOTH, expand=1)

    coords = []
    
    cid = fig.canvas.mpl_connect('button_press_event', onclick)

    button = tkinter.Button(master=root, text="Quit", command=_quit)
    button.pack(side=tkinter.TOP)

    tkinter.mainloop()

In [83]:
from ipywidgets import interact_manual
import ipywidgets as widgets

interact_manual(hand_mark, db=widgets.Dropdown(
    options=[('Four chamber (2)', 2), ('Short axis (17)',17)],
    value=2,
    description='Database',
    disabled=False,), 
                basedir=input('Base directory (e.g. D:/ML_data/PAH): '),
                sub=int(input('Subject ID (e.g. 1005):')),
                slice_=int(input('Slice:')), 
         
        )

Base directory (e.g. D:/ML_data/PAH): D:/ML_data/PAH
Subject ID (e.g. 1005):1005
Slice:0


interactive(children=(Text(value='D:/ML_data/PAH', description='basedir'), Dropdown(description='Database', op…

<function __main__.hand_mark(basedir, db, sub, slice_)>

### Update coords and save to file

In [92]:
def update_coords(basedir, db, sub, mark_names, mark_values):
    mark_names = mark_names.split(',')
    mark_values = mark_values.split(',')
    n_marks = len(mark_names)
    if n_marks == len(mark_values):   
        reg_df = load_landmark(basedir, db)
        sub_idx = np.where(reg_df['ID'] == sub)[0][0]
        for i in range(len(mark_names)):
            reg_df.loc[sub_idx, mark_names[i]] = int(mark_values[i])
        out_fname = 'new_regDB%s.csv' % db
        reg_df.to_csv(os.path.join(basedir, out_fname))
        print('Completed, new landmark file %s saved to %s' % (out_fname, basedir))
    else:
        print('Number of landmark names and values are not consistant!!')
        sys.exit()

In [93]:
from ipywidgets import interact_manual
import ipywidgets as widgets

interact_manual(update_coords, db=widgets.Dropdown(
    options=[('Four chamber (2)', 2), ('Short axis (17)',17)],
    value=2,
    description='Database',
    disabled=False,), 
                basedir=input('Base directory (e.g. D:/ML_data/PAH): '),
                sub=int(input('Subject ID (e.g. 1005):')),
                mark_names=input('Landmark Names (separate by comma, e.g. Spinal X,Spinal Y): '),
                mark_values=input('New landmark values (separate by comma): ')         
        )

Base directory (e.g. D:/ML_data/PAH): D:/ML_data/PAH
Subject ID (e.g. 1005):1005
Landmark Names (separate by comma, e.g. Spinal X,Spinal Y): Spinal X,Spinal Y
New landmark values (separate by comma): 50,50


interactive(children=(Text(value='D:/ML_data/PAH', description='basedir'), Dropdown(description='Database', op…

<function __main__.update_coords(basedir, db, sub, mark_names, mark_values)>