# PyKale Tutorial: A Machine Learning Pipeline for PAH Diagnosis
| [Open in Colab](https://colab.research.google.com/github/pykale/pykale/blob/cmr_example/examples/cmri_mpca/CMR_PAH.ipynb) |  [Launch Binder](https://mybinder.org/v2/gh/pykale/pykale/HEAD?filepath=examples%2Fcmri_mpca%2FCMR_PAH.ipynb) |

## Introduction

- Pre-processing:
    - Registration
    - Masking
    - Rescaling
    - Normalization
- Machine learning pipeline:
    - Multilinear principal component analysis
    - Discriminative feature selection
    - Linear classification model training    

[Figure](https://oup.silverchair-cdn.com/oup/backfile/Content_public/Journal/ehjcimaging/22/2/10.1093_ehjci_jeaa001/2/m_jeaa001f2.jpeg?Expires=1631272906&Signature=PKl6KLDSoNyiTy~GNtXayJCucGKhweXvGz~svHY~ThjqjbokMVCnyJMMjoGQ4C81HdUcdnJA-rcNaqmDjspUs5eAX7avG~ckkIGXqGbPWrfnaMfwywWG3EXvvH0tw9ZcFeelnWgF4lkT0RFDVgzvzhHBvefNgS0ZGwLqiGJduANJFmWIXvYgNiU6M6kRbdpOJZBltknUO~Jv43-ghqmEX7dTfOKAx6~14quDC5cgzVFfwNFRKSn0P5JZRf~wIhQ6GQ4tprl7eXuzuRHcKnFbM~UkZOtcQvVhJofCCgSDnExyS6bns9Dop39OlfQHUdY4cwn1WaSnMKEqAqQaKZ715w__&Key-Pair-Id=APKAIE5G5CRDK6RD3PGA)

**Reference:**

Swift, A. J., Lu, H., Uthoff, J., Garg, P., Cogliano, M., Taylor, J., ... & Kiely, D. G. (2020). A machine learning cardiac magnetic resonance approach to extract disease features and automate pulmonary arterial hypertension diagnosis. European Heart Journal-Cardiovascular Imaging. [[Link](https://academic.oup.com/ehjcimaging/article/22/2/236/5717931)]

## Setup

In [None]:
if 'google.colab' in str(get_ipython()):
    print('Running on CoLab')
    # Uncomment the following two lines for checking out the main branch    
    # !pip install git+https://github.com/pykale/pykale.git
    # !git clone https://github.com/pykale/pykale.git
    # Uncomment the following two lines and comment the above two lines for checking out another branch
    !pip install git+https://github.com/pykale/pykale.git@cmr_example
    !git clone -b cmr_example https://github.com/pykale/pykale.git   
    %cd pykale/examples/cmri_mpca
else:
    print('Not running on CoLab')

This imports required modules.

In [None]:
import os

%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from config import get_cfg_defaults

from kale.utils.download import download_file_by_url
from kale.loaddata.get_dicom import read_dicom_images
from kale.interpret import visualize

## Get CMR Images, Landmark Locations, and Labels

In [None]:
cfg_path = "configs/tutorial_svc.yaml" # Path to `.yaml` config file

cfg = get_cfg_defaults()
cfg.merge_from_file(cfg_path)
cfg.freeze()
print(cfg)

Download data

In [None]:
base_dir = cfg.DATASET.BASE_DIR
file_format = cfg.DATASET.FILE_FORAMT
download_file_by_url(cfg.DATASET.SOURCE, cfg.DATASET.ROOT, "%s.%s" % (base_dir, file_format), file_format)

Read DICOM Images

In [None]:
img_path = os.path.join(cfg.DATASET.ROOT, base_dir, cfg.DATASET.IMG_DIR)
images = read_dicom_images(img_path, sort_instance=True, sort_patient=True)

mask_path = os.path.join(cfg.DATASET.ROOT, base_dir, cfg.DATASET.MASK_DIR)
mask = read_dicom_images(mask_path, sort_instance=True)

Read Landmarks and get labels

In [None]:
df_file = os.path.join(cfg.DATASET.ROOT, base_dir, cfg.DATASET.LANDMARK_FILE)
df = pd.read_csv(df_file, index_col="Subject")
landmarks = df.iloc[:, :6].values
y = df["Group"].values
y[np.where(y != 0)] = 1  # convert to binary classification problem, i.e. no PH vs PAH

Visualizing Data and Landmarks


visualize.plot_multi_images(images, landmarks=landmarks).show()

### CMR Pre-processing

In [None]:
from kale.prepdata.image_transform import mask_img_stack, normalize_img_stack, reg_img_stack, rescale_img_stack

Image Registration

In [None]:
img_reg, max_dist = reg_img_stack(images, landmarks)

In [None]:
visualize.plot_multi_images(img_reg).show()

Masking

In [None]:
img_masked = mask_img_stack(img_reg, mask[0, 0, ...])

In [None]:
visualize.plot_multi_images(img_masked).show()

Data Rescale

In [None]:
img_rescaled = rescale_img_stack(img_masked, scale=2)

In [None]:
visualize.plot_multi_images(img_rescaled).show()

Normalization

In [None]:
img_norm = normalize_img_stack(img_rescaled)

In [None]:
visualize.plot_multi_images(img_norm).show()

### PAH Classification

In [None]:
from sklearn.model_selection import cross_validate
from kale.pipeline.mpca_trainer import MPCATrainer

x = img_norm.copy()
trainer = MPCATrainer(classifier=cfg.PIPELINE.CLF, n_features=200)
cv_results = cross_validate(trainer, x, y, cv=10, scoring=["accuracy", "roc_auc"], n_jobs=1)

In [None]:
cv_results

In [None]:
print("Averaged training time: {:.4f} seconds" .format(np.mean(cv_results['fit_time'])))
print("Averaged testing time: {:.4f} seconds" .format(np.mean(cv_results['score_time'])))
print("Averaged Accuracy: {:.4f}" .format(np.mean(cv_results["test_accuracy"])))
print("Averaged AUC: {:.4f}" .format(np.mean(cv_results["test_roc_auc"])))

### Model Interpretation

In [None]:
from kale.interpret import model_weights

trainer.fit(x, y)

weights = trainer.mpca.inverse_transform(trainer.clf.coef_) - trainer.mpca.mean_
top_weights = model_weights.select_top_weight(weights, select_ratio=0.1)

visualize.plot_weights(top_weights[0][0], background_img=x[0][0]).show()