In [1]:
import pandas as pd
from reload_recursive import reload_recursive
import os
from loguru import logger
from pathlib import Path
import json
import numpy as np
from tqdm.notebook import tqdm
import re
import sys

import mri_data
import monai_training

In [31]:
reload_recursive(mri_data)
reload_recursive(monai_training)

from mri_data.file_manager import DataSet, scan_3Tpioneer_bids
from mri_data import file_manager as fm
from mri_data import utils
from monai_training.preprocess import DataSetProcesser

In [3]:
logger.remove()

In [4]:
drive_root = fm.get_drive_root()
msmri_home = Path("/home/srs-9/Projects/ms_mri")
training_work_dirs = msmri_home / "training_work_dirs"
dataroot = drive_root / "3Tpioneer_bids"
clinical_data_root = drive_root / "Secure_Data" / "Large"
project_dataroot = msmri_home / "data"

### Automatic Segmentations

In [5]:
work_dir_names = ["choroid_pineal_pituitary3", "choroid_pineal_pituitary3-2", "choroid_pineal_pituitary3-3", "choroid_pineal_pituitary3-4"]
work_dirs = [training_work_dirs / name / "ensemble_output" for name in work_dir_names]
ensemble_datasets = [fm.scan_3Tpioneer_bids(work_dir, label="flair.t1_ensemble.nii.gz") for work_dir in work_dirs]

In [6]:
def get_volumes(scan):
    vol_stats = utils.compute_volume(scan.label_path, index_mask_file=scan.label_path)
    
    return tuple([stat[1] for stat in vol_stats])

In [7]:
vols = []
for dataset in ensemble_datasets:
    for scan in dataset:
        try:
            vol = get_volumes(scan)
        except Exception:
            print(scan.subid)
            continue

        try:
            assert len(vol) == 3
        except AssertionError:
            vol = [None, None, None]

        vols.append((scan.subid, vol))


    vols_arr = np.stack([vol[1] for vol in vols])
    vol_sub_arr = np.array([vol[0] for vol in vols])

1038
1540


In [8]:
keep_cols = ["ms_type", "flair_contrast"]
df_full = pd.read_csv(project_dataroot / "clinical_data_full.csv", index_col="subid")
df_full = df_full[keep_cols]
df_full.index.name = "subject_id"
try:
    df_full.insert('dz_type', df_full['ms_type'])
except Exception:
    pass

df_full.loc[:, 'dz_type'] = df_full['ms_type']

df_full.loc[df_full['ms_type'] == 'CIS', 'dz_type'] = 'RRMS'
df_full.loc[df_full['ms_type'].isin(['PPMS', 'SPMS', 'RPMS', 'PRMS']), 'dz_type'] = 'PMS'
df_full.loc[df_full['ms_type'].isin(['NIND', 'OIND', 'HC']), 'dz_type'] = '!MS'

df_full.loc[:, 'dz_type2'] = df_full['dz_type']
df_full.loc[df_full['dz_type'].isin(['RRMS', 'PMS']), 'dz_type2'] = 'MS'

In [9]:
df = pd.DataFrame(vols_arr, 
             index=[int(sub) for sub in vol_sub_arr], 
             columns=["choroid_volume", "pineal_volume", "pituitary_volume"])

df['dz_type'] = df_full['dz_type']
df['dz_type2'] = df_full['dz_type2']
df['flair_contrast'] = df_full['flair_contrast']

### Manual Segmentations

In [18]:
subjects = []
for dataset in ensemble_datasets:
    for scan in dataset:
        subjects.append(int(scan.subid))

In [11]:
def has_subject(scan, subjects: list[int]) -> bool:
    if int(scan.subid) in subjects:
        return True
    else:
        return False

In [12]:
orig_dataset_proc = DataSetProcesser.new_dataset(dataroot, fm.scan_3Tpioneer_bids, filters=fm.filter_first_ses)
orig_dataset_proc.filter([has_subject], [(subjects,)])
orig_dataset_proc.prepare_labels(["choroid_t1_flair", "pineal", "pituitary"], ["CH", "SRS", "DT", "ED"])
orig_dataset = orig_dataset_proc.dataset

100%|██████████| 40/40 [00:00<00:00, 121.68it/s]


In [13]:
orig_vols = []
for scan in orig_dataset:
    try:
        vol = get_volumes(scan)
    except Exception:
        print(scan.subid)
        continue

    try:
        assert len(vol) == 3
    except AssertionError:
        vol = [None, None, None]

    orig_vols.append((scan.subid, vol))


orig_vols_arr = np.stack([vol[1] for vol in orig_vols])
orig_vol_sub_arr = np.array([vol[0] for vol in orig_vols])

In [14]:
orig_df = pd.DataFrame(orig_vols_arr, 
             index=[int(sub) for sub in orig_vol_sub_arr], 
             columns=["choroid_volume", "pineal_volume", "pituitary_volume"])

orig_df['dz_type'] = df_full['dz_type']
orig_df['dz_type2'] = df_full['dz_type2']
orig_df['flair_contrast'] = df_full['flair_contrast']

### Automatic Segmentation Volumes

In [15]:
print("Choroid:")
choroid_ms_mean = df[df['dz_type'] == "RRMS"].choroid_volume.mean()
choroid_ms_mean_w = df[(df['dz_type'] == "RRMS") & (df['flair_contrast'] == "WITH")].choroid_volume.mean()
choroid_ms_mean_wo = df[(df['dz_type'] == "RRMS") & (df['flair_contrast'] == "WITHOUT")].choroid_volume.mean()
choroid_notms_mean = df[df['dz_type'] == "!MS"].choroid_volume.mean()
print("MS mean:             {:0.2f}".format(choroid_ms_mean))
print("MS mean w contrast:  {:0.2f}".format(choroid_ms_mean_w))
print("MS mean wo contrast: {:0.2f}".format(choroid_ms_mean_wo))
print("!MS mean:            {:0.2f}".format(choroid_notms_mean))

print("\nPineal")
pineal_ms_mean = df[df['dz_type'] == "RRMS"].pineal_volume.mean()
pineal_ms_mean_w = df[(df['dz_type'] == "RRMS") & (df['flair_contrast'] == "WITH")].pineal_volume.mean()
pineal_ms_mean_wo = df[(df['dz_type'] == "RRMS") & (df['flair_contrast'] == "WITHOUT")].pineal_volume.mean()
pineal_notms_mean = df[df['dz_type'] == "!MS"].pineal_volume.mean()
print("MS mean:             {:0.2f}".format(pineal_ms_mean))
print("MS mean w contrast:  {:0.2f}".format(pineal_ms_mean_w))
print("MS mean wo contrast: {:0.2f}".format(pineal_ms_mean_wo))
print("!MS mean:            {:0.2f}".format(pineal_notms_mean))

print("\nPituitary")
pituitary_ms_mean = df[df['dz_type'] == "RRMS"].pituitary_volume.mean()
pituitary_ms_mean_w = df[(df['dz_type'] == "RRMS") & (df['flair_contrast'] == "WITH")].pituitary_volume.mean()
pituitary_ms_mean_wo = df[(df['dz_type'] == "RRMS") & (df['flair_contrast'] == "WITHOUT")].pituitary_volume.mean()
pituitary_notms_mean = df[df['dz_type'] == "!MS"].pituitary_volume.mean()
print("MS mean:             {:0.2f}".format(pituitary_ms_mean))
print("MS mean w contrast:  {:0.2f}".format(pituitary_ms_mean_w))
print("MS mean wo contrast: {:0.2f}".format(pituitary_ms_mean_wo))
print("!MS mean:            {:0.2f}".format(pituitary_notms_mean))

Choroid:
MS mean:             1766.81
MS mean w contrast:  1892.98
MS mean wo contrast: 1609.09
!MS mean:            1922.68

Pineal
MS mean:             228.35
MS mean w contrast:  269.92
MS mean wo contrast: 176.39
!MS mean:            254.34

Pituitary
MS mean:             700.47
MS mean w contrast:  677.11
MS mean wo contrast: 729.67
!MS mean:            716.21


### Manual Segmentation Volumes

In [16]:
print("Choroid:")
choroid_ms_mean = orig_df[orig_df['dz_type'] == "RRMS"].choroid_volume.mean()
choroid_ms_mean_w = orig_df[(orig_df['dz_type'] == "RRMS") & (orig_df['flair_contrast'] == "WITH")].choroid_volume.mean()
choroid_ms_mean_wo = orig_df[(orig_df['dz_type'] == "RRMS") & (orig_df['flair_contrast'] == "WITHOUT")].choroid_volume.mean()
choroid_notms_mean = orig_df[orig_df['dz_type'] == "!MS"].choroid_volume.mean()
print("MS mean:             {:0.2f}".format(choroid_ms_mean))
print("MS mean w contrast:  {:0.2f}".format(choroid_ms_mean_w))
print("MS mean wo contrast: {:0.2f}".format(choroid_ms_mean_wo))
print("!MS mean:            {:0.2f}".format(choroid_notms_mean))

print("\nPineal")
pineal_ms_mean = orig_df[orig_df['dz_type'] == "RRMS"].pineal_volume.mean()
pineal_ms_mean_w = orig_df[(orig_df['dz_type'] == "RRMS") & (orig_df['flair_contrast'] == "WITH")].pineal_volume.mean()
pineal_ms_mean_wo = orig_df[(orig_df['dz_type'] == "RRMS") & (orig_df['flair_contrast'] == "WITHOUT")].pineal_volume.mean()
pineal_notms_mean = orig_df[orig_df['dz_type'] == "!MS"].pineal_volume.mean()
print("MS mean:             {:0.2f}".format(pineal_ms_mean))
print("MS mean w contrast:  {:0.2f}".format(pineal_ms_mean_w))
print("MS mean wo contrast: {:0.2f}".format(pineal_ms_mean_wo))
print("!MS mean:            {:0.2f}".format(pineal_notms_mean))

print("\nPituitary")
pituitary_ms_mean = orig_df[orig_df['dz_type'] == "RRMS"].pituitary_volume.mean()
pituitary_ms_mean_w = orig_df[(orig_df['dz_type'] == "RRMS") & (orig_df['flair_contrast'] == "WITH")].pituitary_volume.mean()
pituitary_ms_mean_wo = orig_df[(orig_df['dz_type'] == "RRMS") & (orig_df['flair_contrast'] == "WITHOUT")].pituitary_volume.mean()
pituitary_notms_mean = orig_df[orig_df['dz_type'] == "!MS"].pituitary_volume.mean()
print("MS mean:             {:0.2f}".format(pituitary_ms_mean))
print("MS mean w contrast:  {:0.2f}".format(pituitary_ms_mean_w))
print("MS mean wo contrast: {:0.2f}".format(pituitary_ms_mean_wo))
print("!MS mean:            {:0.2f}".format(pituitary_notms_mean))

Choroid:
MS mean:             1954.47
MS mean w contrast:  1956.55
MS mean wo contrast: 1951.61
!MS mean:            1880.28

Pineal
MS mean:             230.64
MS mean w contrast:  260.88
MS mean wo contrast: 189.06
!MS mean:            371.61

Pituitary
MS mean:             767.78
MS mean w contrast:  755.84
MS mean wo contrast: 784.20
!MS mean:            679.33


### Inspect Segmentations

In [17]:
auto_segs = {}
for dataset in ensemble_datasets:
    for scan in dataset:
        auto_segs[scan.subid] = scan.label_path

man_segs = {}
for scan in orig_dataset:
    man_segs[scan.subid] = scan.label_path

In [23]:
df_inference = df_full[df_full.index.isin(subjects)]
for subid, _ in df_inference.iterrows():
    df_inference.loc[subid, 'manual_label'] = man_segs[str(subid)]
    df_inference.loc[subid, 'auto_label'] = auto_segs[str(subid)]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_inference.loc[subid, 'manual_label'] = man_segs[str(subid)]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_inference.loc[subid, 'auto_label'] = auto_segs[str(subid)]


In [28]:
for scan in orig_dataset:
    df_inference.loc[int(scan.subid), 'scan_folder'] = scan.root

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_inference.loc[int(scan.subid), 'scan_folder'] = scan.root


In [24]:
df_inference.index.name = "subject_id"
df_inference.sort_values(by=['dz_type2', "subject_id"], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_inference.sort_values(by=['dz_type2', "subject_id"], inplace=True)


In [32]:
for i, row in df_inference.iterrows():
    images = [Path(row['scan_folder']) / "flair.nii.gz", Path(row['scan_folder']) / "t1.nii.gz"]
    labels = [row['auto_label'], row['manual_label']]
    cmd = utils.open_itksnap_workspace_cmd(images, labels)
    df_inference.loc[i, "itksnap_cmd"] = cmd

In [33]:
import pyperclip

pyperclip.copy(df_inference.loc[1029, "itksnap_cmd"])

In [25]:
lines = ["# Notes", "\n\n"]
dz_type = ""
for i, row in df_inference.iterrows():
    if row['dz_type2'] != dz_type:
        lines.extend([f"## {row['dz_type2']}", "\n\n"])
        dz_type = row['dz_type2']
    lines.extend([f"### {i}", "\n\n"])

In [26]:
import os

if not os.path.exists("seg_notes.md"):
    with open("seg_notes.md", 'w') as f:
        f.writelines(lines)