# PyKale Tutorial: A Machine Learning Pipeline for PAH Diagnosis
|[Launch Binder](https://mybinder.org/v2/gh/pykale/pykale/HEAD?filepath=examples%2Fcmri_mpca%2FCMR_PAH.ipynb) | [Open in Colab](https://colab.research.google.com/github/pykale/pykale/blob/main/examples/cmri_mpca/CMR_PAH.ipynb) |

A brief introduction to the pipeline

![A brief introduction to the pipeline](https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/ehjcimaging/22/2/10.1093_ehjci_jeaa001/2/m_jeaa001f2.jpeg?Expires=1631272906&Signature=PKl6KLDSoNyiTy~GNtXayJCucGKhweXvGz~svHY~ThjqjbokMVCnyJMMjoGQ4C81HdUcdnJA-rcNaqmDjspUs5eAX7avG~ckkIGXqGbPWrfnaMfwywWG3EXvvH0tw9ZcFeelnWgF4lkT0RFDVgzvzhHBvefNgS0ZGwLqiGJduANJFmWIXvYgNiU6M6kRbdpOJZBltknUO~Jv43-ghqmEX7dTfOKAx6~14quDC5cgzVFfwNFRKSn0P5JZRf~wIhQ6GQ4tprl7eXuzuRHcKnFbM~UkZOtcQvVhJofCCgSDnExyS6bns9Dop39OlfQHUdY4cwn1WaSnMKEqAqQaKZ715w__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA)

## Setup

In [None]:
if 'google.colab' in str(get_ipython()):
    print('Running on CoLab')
    !pip install git+https://github.com/pykale/pykale.git
    !git clone https://github.com/pykale/pykale.git
    # Uncomment the following two lines and comment the above two lines for checking out another branch
    # !pip install git+https://github.com/pykale/pykale.git@branch_name
    # !git clone -b branch_name https://github.com/pykale/pykale.git   
    %cd pykale/examples/cmri_mpca
else:
    print('Not running on CoLab')

This imports required modules.

In [None]:
import os

from config import get_cfg_defaults, read_dicom_imgs
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from kale.utils.download import download_file_by_url

## Get CMR Images, Landmark Locations, and Labels

In [None]:
cfg_path = "tutorial.yaml" # Path to `.yaml` config file

cfg = get_cfg_defaults()
cfg.merge_from_file(cfg_path)
cfg.freeze()
print(cfg)

Download data

In [None]:
file_name = cfg.DATASET.FILENAME
file_format = cfg.DATASET.FILE_FORAMT
download_file_by_url(cfg.DATASET.SOURCE, cfg.DATASET.ROOT, "%s.%s" % (file_name, file_format), file_format)

Read DICOM Images

In [None]:
img_path = os.path.join(cfg.DATASET.ROOT, file_name, cfg.DATASET.IMG_DIR)
imgs = read_dicom_imgs(img_path)

mask_path = os.path.join(cfg.DATASET.ROOT, file_name, cfg.DATASET.MASK_FILE)
mask = read_dicom_imgs(mask_path)

Read Landmarks and get labels

In [None]:
df_file = os.path.join(cfg.DATASET.ROOT, file_name, cfg.DATASET.LANDMARK_FILE)
df = pd.read_csv(df_file, index_col='Subject')
landmarks = df.iloc[:, :6]
y = df['Group'].values
y[np.where(y != 0)] = 1

Visualising Data and Landmarks

In [None]:
def visualise_imgs(imgs, landmarks=None):

    columns = 10
    rows = int(imgs.shape[0] / columns) + 1

    fig = plt.figure(figsize=(20, 36))

    for i in range(imgs.shape[0]):
        fig.add_subplot(rows, columns, i + 1)
        plt.axis('off')
        plt.imshow(imgs[i, 0, ...])
        if landmarks is not None:
            coords = landmarks.iloc[i, :].values.reshape((-1, 2))
            n_landmark = coords.shape[0]
            for j in range(n_landmark):
                ix = coords[j, 0]
                iy = coords[j, 1]
                plt.plot(ix,iy, marker='o', markersize=5, markerfacecolor=(1, 1, 1, 0.1),markeredgewidth=1.5, markeredgecolor='r')
        plt.title(i + 1)

    plt.show()

In [None]:
visualise_imgs(imgs, landmarks=landmarks)

### CMR Pre-processing

In [None]:
from kale.prepdata.image_transform import mask_img_stack, reg_img_stack, rescale_img_stack, normalize_img_stack

Image Registration

In [None]:
img_reg, max_dist = reg_img_stack(imgs, landmarks.values)

In [None]:
visualise_imgs(img_reg)

Masking

In [None]:
img_masked = mask_img_stack(img_reg, mask[0, 0, ...])

In [None]:
visualise_imgs(img_masked)

Data Rescale

In [None]:
img_rescaled = rescale_img_stack(img_masked, scale=2)

In [None]:
visualise_imgs(img_rescaled)

Normalization

In [None]:
img_norm = normalize_img_stack(img_rescaled)

In [None]:
visualise_imgs(img_norm)

### PAH Classification

In [None]:
from kale.interpret import model_weights
from kale.pipeline.mpca_trainer import MPCATrainer

In [None]:
from sklearn.model_selection import cross_validate

x = img_norm
trainer = MPCATrainer(n_features=200)
cv_results = cross_validate(trainer, x, y, cv=10, scoring=['accuracy', 'roc_auc'], n_jobs=1)

In [None]:
cv_results

In [None]:
print("Accuracy: ", np.mean(cv_results['test_accuracy']))
print("AUC: ", np.mean(cv_results['test_roc_auc']))

### Model Interpretation

In [None]:
from kale.interpret import model_weights

trainer.fit(x, y)

weights = trainer.mpca.inverse_transform(trainer.clf.coef_) - trainer.mpca.mean_
top_weights = model_weights.select_top_weight(weights, select_ratio=0.1)
fig = model_weights.plot_weights(top_weights[0][0], background_img=x[0][0])