In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import os

from cvmt.utils import (load_yaml_params, nested_dict_to_easydict)
from cvmt.utils import (
    img_coord_2_cartesian_coord,
    translate_landmarks,
    rotate_landmarks,
    plot_landmarks,
    normalize_coords,
    plot_image_and_vertebral_landmarks,
)

from cvmt.ml.utils import download_wandb_model_checkpoint
from cvmt.ml.trainer import create_dataloader, max_indices_4d_tensor
from cvmt.inference.inference import load_pretrained_model_eval_mode
import torch
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 100)

In [4]:
def cartesian_to_polar(coords):
    """Transform cartesian coordiantes to polar coordinates."""
    r = np.sqrt(coords[..., 0]**2 + coords[..., 1]**2)
    theta = np.arctan2(coords[..., 1], coords[..., 0])
    return np.stack((r, theta), axis=-1)

In [5]:
def post_process_vertebral_landmarks(
    landmarks: np.ndarray,
    swap_x_y: bool = False,
    plot: bool = False,
    retrieve_orig_position: bool = False,
) -> np.ndarray:
    """Rotate, translate, and normalize the vertebral landmarks. The
    normalization is done with respect to the distance between the 2
    points at the left and right of the base of each shape.
    """
    c2, c3, c4 = landmarks[0:3].copy(), landmarks[3:8].copy(), landmarks[8:].copy()
    c2_cart = img_coord_2_cartesian_coord(c2, swap_x_y) 
    c2_ref_index_tr = 0
    c2_trns = translate_landmarks(c2_cart, ref_index=c2_ref_index_tr)
    c2_ref_index_rot = 2
    c2_trns_rot = rotate_landmarks(c2_trns, ref_index=c2_ref_index_rot)
    c2_trns_rot_nr = 2
    c2_trns_rot_n = normalize_coords(
        landmarks=c2_trns_rot,
        ref_index=c2_trns_rot_nr,
        height_wise=False,
    )
    if plot:
        plot_landmarks(c2_trns_rot_n)
    c3_cart = img_coord_2_cartesian_coord(c3, swap_x_y)
    c3_ref_index_tr = 1
    c3_trns = translate_landmarks(c3_cart, ref_index=c3_ref_index_tr)
    c3_ref_index_rot = 3
    c3_trns_rot = rotate_landmarks(c3_trns, ref_index=c3_ref_index_rot)
    c3_ref_index_nr = 3
    c3_trns_rot_n = normalize_coords(
        landmarks=c3_trns_rot,
        ref_index=c3_ref_index_nr,
        height_wise=False,
    )
    if plot:
        plot_landmarks(c3_trns_rot_n)
    c4_cart = img_coord_2_cartesian_coord(c4, swap_x_y)
    c4_ref_index_tr = 1
    c4_trns = translate_landmarks(c4_cart, ref_index=c4_ref_index_tr)
    c4_ref_index_rot = 3
    c4_trns_rot = rotate_landmarks(c4_trns, ref_index=c4_ref_index_rot)
    c4_ref_index_nr = 3
    c4_trns_rot_n = normalize_coords(
        landmarks=c4_trns_rot,
        ref_index=c4_ref_index_nr,
        height_wise=False,
    )
    if plot:
        plot_landmarks(c4_trns_rot_n)
    # retrieve the original distance of the vertebrae
    if retrieve_orig_position:
        c24_cart_dist = np.abs(c2_cart[c2_ref_index] - c4_cart[c4_ref_index])/c4_trns_rot[c4_ref_index_nr]
        c34_cart_dist = np.abs(c3_cart[c3_ref_index] - c4_cart[c4_ref_index])/c4_trns_rot[c4_ref_index_nr]
        c2_trns_rot_n += c24_cart_dist
        c3_trns_rot_n += c34_cart_dist
    normalized_landmarks = np.vstack((c2_trns_rot_n, c3_trns_rot_n, c4_trns_rot_n))
    return normalized_landmarks

# Stage Clustering

In this notebook, we see how we can utilize the nominal patterns of the different stages that were
reported by McNamara and Franchi into a clustering tasks. The nominal patterns serve as the
characteristics of the cluster centers.

In [6]:
os.chdir("../../")
!source configs/.env

## Load parameters

In [7]:
CONFIG_PARAMS_PATH = "configs/params.yaml"

params = nested_dict_to_easydict(
    load_yaml_params(CONFIG_PARAMS_PATH)
)

## Load model

In [8]:
checkpoint_path, model_id = download_wandb_model_checkpoint(
    wandb_checkpoint_uri= params.VERIFY.WANDB_CHECKPOINT_REFERENCE_NAME
)
print(checkpoint_path)

[34m[1mwandb[0m: Downloading large artifact model-urt7dgbp:v47, 100.36MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.4


./artifacts/model-urt7dgbp:v47/model.ckpt


In [9]:
use_pretrain = True

task_config = params.TRAIN.V_LANDMARK_TASK
task_id = task_config.TASK_ID

loss_name = params.TRAIN.LOSS_NAME
model_params = params.MODEL.PARAMS
transforms_params = params.INFERENCE.TRANSFORMS

In [10]:
model, device = load_pretrained_model_eval_mode(
    model_params=model_params,
    use_pretrain=use_pretrain,
    checkpoint_path=checkpoint_path,
    task_id=task_id,
    loss_name=loss_name,
)

  rank_zero_warn(


## Load and process cluster centers

In [11]:
cluster_centers = []
for i in range(1,7):
    cs_df = pd.read_csv(
        os.path.join(
            params.INTERMEDIATE_DATA_DIRECTORY, "stages_nominal_patterns", f"cs{i}.csv"
        ),
        header=None,
        names=['index', 'x', 'y']
    )
    cs = cs_df.iloc[:, 1:].to_numpy()
    normalized_landmarks = post_process_vertebral_landmarks(
        landmarks=cs, swap_x_y=False, plot=False,
    )
    cluster_centers.append(normalized_landmarks)

## Load training and validation set data, predict, and process the landmarks

In [12]:
# train dataloader
train_dataloader = create_dataloader(
    task_id=task_id,
    batch_size=1,
    split='train',
    shuffle=False,
    params=params,
    sampler_n_samples=None,
)
# val dataloader
val_dataloader = create_dataloader(
    task_id=task_id,
    batch_size=1,
    split='val',
    shuffle=False,
    params=params,
    sampler_n_samples=None,
)

In [13]:
train_set = []
for i, batch in enumerate(train_dataloader):
    images, targets = batch['image'], batch['v_landmarks']
    images = images.to(device)
    targets = targets.to(device)
    # Pass images through the model
    with torch.no_grad():
        lmks = model(images, task_id=task_id)
    # turn heatmaps to coordinates
    lmks = max_indices_4d_tensor(lmks)
    lmks = lmks.squeeze()
    lmks = lmks.cpu().numpy()
    # process coordinates
    normalized_landmarks = post_process_vertebral_landmarks(
        landmarks=lmks, swap_x_y=True, plot=False,)
    train_set.append(normalized_landmarks)

In [14]:
val_set = []
for i, batch in enumerate(val_dataloader):
    images, targets = batch['image'], batch['v_landmarks']
    images = images.to(device)
    targets = targets.to(device)
    # Pass images through the model
    with torch.no_grad():
        lmks = model(images, task_id=task_id)
    # turn heatmaps to coordinates
    lmks = max_indices_4d_tensor(lmks)
    lmks = lmks.squeeze()
    lmks = lmks.cpu().numpy()
    # process coordinates
    normalized_landmarks = post_process_vertebral_landmarks(
        landmarks=lmks, swap_x_y=True, plot=False,)
    val_set.append(normalized_landmarks)

## flatten original coordinates

In [15]:
train_set = np.array(train_set)
val_set = np.array(val_set)
cluster_centers = np.array(cluster_centers)

In [16]:
train_set_fl = train_set.reshape(train_set.shape[0], -1)
val_set_fl = val_set.reshape(val_set.shape[0], -1)
cluster_centers_fl = cluster_centers.reshape(cluster_centers.shape[0], -1)

# Polar coordinates

# Fit to PCA

# Fit PCA coordinates to the KMeans

# Fit raw flattened coordinates to the KMeans

In [30]:
# Define initial cluster centers
init_centers = np.array(cluster_centers_fl)

# Create KMeans object with initial centers
kmeans = KMeans(n_clusters=6, init=init_centers, n_init=1)

# Fit the model to your data
kmeans.fit(train_set_fl)

KMeans(init=array([[ 0.  ,  0.  ,  0.5 ,  0.01,  1.  ,  0.  , -0.05,  0.77,  0.  ,
         0.  ,  0.51, -0.01,  1.  ,  0.  ,  0.96,  0.33, -0.05,  0.77,
         0.  ,  0.  ,  0.52, -0.01,  1.  ,  0.  ,  0.98,  0.33],
       [ 0.  ,  0.  ,  0.41,  0.17,  1.  ,  0.  , -0.03,  0.77,  0.  ,
         0.  ,  0.48, -0.01,  1.  ,  0.  ,  0.96,  0.33, -0.06,  0.78,
         0.  ,  0.  ,  0.5 ,  0.01,  1.  , -0.  ,  0.96,  0.32],
       [ 0.  ,  0.  ,  0.45,  0.19,  1.  ,  0.  , -0.06,  0.78,  0.  ,
         0.  ,  0.5 ,  0.08,  1.  , -0.  ,  0.9 ,  0.55, -0.06,  0.78,
         0.  ,  0.  ,  0.52, -0.02,  1.  , -0.  ,  0.97,  0.33],
       [ 0.  ,  0.  ,  0.47,  0.17,  1.  ,  0.  , -0.01,  0.81,  0.  ,
         0.  ,  0.5 ,  0.16,  1.  , -0.  ,  1.02,  0.71, -0.03,  0.77,
         0.  ,  0.  ,  0.48,  0.14,  1.  ,  0.  ,  1.  ,  0.7 ],
       [ 0.  ,  0.  ,  0.46,  0.2 ,  1.  , -0.  , -0.02,  1.03,  0.  ,
         0.  ,  0.49,  0.18,  1.  ,  0.  ,  0.98,  0.97,  0.  ,  1.02,
         0.  ,  0.

In [31]:
train_set_clusters = kmeans.predict(train_set_fl)
np.unique(train_set_clusters, return_counts=True)

(array([0, 1, 2, 3, 4, 5], dtype=int32), array([132, 138,  97,  72,  60,   2]))

In [32]:
val_set_clusters = kmeans.predict(val_set_fl)
np.unique(val_set_clusters, return_counts=True)

(array([0, 1, 2, 3, 4, 5], dtype=int32), array([ 2, 43,  3, 40, 75,  1]))

## Plot landmarks and corresponding clusters

In [None]:
# val dataloader
val_dataloader = create_dataloader(
    task_id=task_id,
    batch_size=1,
    split='val',
    shuffle=False,
    params=params,
    sampler_n_samples=None,
)

for i, batch in enumerate(val_dataloader):
    images, targets = batch['image'], batch['v_landmarks']
    images = images.to(device)
    with torch.no_grad():
        lmks = model(images, task_id=task_id)
    # turn heatmaps to coordinates
    image = images.detach().cpu().numpy()[0,0,...]
    lmks = max_indices_4d_tensor(lmks)
    lmks = lmks.squeeze()
    lmks = lmks.cpu().numpy()
    lmks_flipped = np.flip(lmks.copy(),1)
    lmks_flipped[:,1] = -1 * lmks_flipped[:,1]
    clss = val_set_clusters[i]
    if i % 10 == 0:
        print(clss+1)
        plot_landmarks(lmks_flipped)
        plot_image_and_vertebral_landmarks(
            img_name="",
            model_id="",
            landmarks=lmks,
            image=image,
        )


In [None]:
display(pd.DataFrame(val_set_clusters),)