In [1]:
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from cell_phenotyping import Predictor
sys.path.append('../../')
from utils import conf_matrix, accuracy_plots, performance_plots, micro_average_pr_curve, average_pr_curve_per_class

In [2]:
cHL2_MIBI_dataset_dir = '~/MAPS/cHL2_MIBI/datasets/'
cHL2_MIBI_class_names = pd.read_csv(os.path.join(cHL2_MIBI_dataset_dir, 'class_names.csv'))
cHL2_MIBI_marker_names = pd.read_csv(os.path.join(cHL2_MIBI_dataset_dir, 'marker_names.csv'))['marker_name'].tolist()

cHL1_MIBI_dataset_dir = '~/MAPS/cHL1_MIBI/datasets/'
cHL1_MIBI_checkpoint_dir = '~/MAPS/cHL1_MIBI/results/train_valid_test/'
cHL1_MIBI_class_names = pd.read_csv(os.path.join(cHL1_MIBI_dataset_dir, 'class_names.csv'))
cHL1_MIBI_marker_names = pd.read_csv(os.path.join(cHL1_MIBI_dataset_dir, 'marker_names.csv'))['marker_name'].tolist()

color_palette =['#bed8d9', '#2bb8b2', '#fed53f', '#f98866', '#fe0000']
result_dir = '~/MAPS/cHL1_MIBI/results/cHL2_MIBI_test/'

## Class Mapping

In [3]:
cHL2_MIBI_id_to_class_name_mapping = {}
cHL2_MIBI_class_to_id_name_mapping = {}
for i, row in cHL2_MIBI_class_names.iterrows():
    cHL2_MIBI_id_to_class_name_mapping[row['class_id']]=row['class_name']
    cHL2_MIBI_class_to_id_name_mapping[row['class_name']]=row['class_id']

cHL1_MIBI_id_name_to_class_mapping = {}
cHL1_MIBI_class_name_to_id_mapping = {}
for i, row in cHL1_MIBI_class_names.iterrows():
    cHL1_MIBI_id_name_to_class_mapping[row['class_id']]=row['class_name']
    cHL1_MIBI_class_name_to_id_mapping[row['class_name']]=row['class_id']

cHL2_MIBI_to_cHL1_MIBI_class_name_mapping = {'B': 'B', 'CD4 T': 'CD4', 'CD8 T': 'CD8', 'DC': 'DC', 'Endothelial': 'Endothelial', 'M1': 'M1', 'M2': 'M2', 'NK': 'NK', 'Neutrophil': 'Neutrophil', 'Other': 'Other', 'CD4 Treg': 'Treg', 'Tumor': 'Tumor'}
cHL1_MIBI_to_cHL2_MIBI_class_name_mapping = {'B': 'B', 'CD4': 'CD4 T', 'CD8': 'CD8 T', 'Cytotoxic CD4': 'CD4 T', 'DC': 'DC', 'Endothelial': 'Endothelial', 'M1': 'M1', 'M2': 'M2', 'NK': 'NK', 'Neutrophil': 'Neutrophil', 'Other': 'Other', 'Treg': 'CD4 Treg', 'Tumor': 'Tumor'}

cHL2_MIBI_to_cHL1_MIBI_class_id_mapping = {}
for key in cHL2_MIBI_to_cHL1_MIBI_class_name_mapping.keys():
    cHL2_MIBI_class_id = cHL2_MIBI_class_to_id_name_mapping[key]
    cHL1_MIBI_class_id = cHL1_MIBI_class_name_to_id_mapping[cHL2_MIBI_to_cHL1_MIBI_class_name_mapping[key]]
    cHL2_MIBI_to_cHL1_MIBI_class_id_mapping[cHL2_MIBI_class_id] = cHL1_MIBI_class_id

cHL1_MIBI_to_cHL2_MIBI_class_id_mapping = {}
for key in cHL1_MIBI_to_cHL2_MIBI_class_name_mapping.keys():
    cHL1_MIBI_class_id = cHL1_MIBI_class_name_to_id_mapping[key]
    cHL2_MIBI_class_id = cHL2_MIBI_class_to_id_name_mapping[cHL1_MIBI_to_cHL2_MIBI_class_name_mapping[key]]
    cHL1_MIBI_to_cHL2_MIBI_class_id_mapping[cHL1_MIBI_class_id] = cHL2_MIBI_class_id

## Prediction on cHL2_MIBI Dataset using cHL1_MIBI Model

In [4]:
class_names = cHL2_MIBI_class_names['class_name'].tolist()
res_df_cols = ['%s_prob' % class_name for class_name in class_names]
res_df_cols.append('pred_label')
res_df_cols.append('gt_label')

fold_count = 5
for i in range(fold_count):
    pretrained_model_checkpoint_path = os.path.join(cHL1_MIBI_checkpoint_dir, 'fold_%d' % i, 'best_checkpoint.pt')
    model = Predictor(model_checkpoint_path=pretrained_model_checkpoint_path, num_features=42, num_classes=13, batch_size=128, normalization=True)

    train_data_path = os.path.join(cHL2_MIBI_dataset_dir, 'splits', 'fold_%d' % i, 'train.csv')
    valid_data_path = os.path.join(cHL2_MIBI_dataset_dir, 'splits', 'fold_%d' % i, 'valid.csv')
    test_data_path = os.path.join(cHL2_MIBI_dataset_dir, 'splits', 'fold_%d' % i, 'test.csv')
    
    df = pd.concat([pd.read_csv(train_data_path), pd.read_csv(valid_data_path), pd.read_csv(test_data_path)])
    gt_labels = df['cell_label'].to_numpy()
    for marker_name in cHL1_MIBI_marker_names:
        if marker_name not in df.columns:
            df[marker_name] = 0.0
    if 'cellSize' not in cHL1_MIBI_marker_names:
        cHL1_MIBI_marker_names.append('cellSize')
    df = df[cHL1_MIBI_marker_names]
    df.to_csv('test.csv', index=False)

    pred_labels, pred_probs = model.predict('test.csv')
    cHL2_MIBI_pred_labels = np.zeros((pred_probs.shape[0],))-1
    cHL2_MIBI_pred_probs = np.zeros((pred_probs.shape[0], cHL2_MIBI_class_names.shape[0]))
    for key in cHL1_MIBI_to_cHL2_MIBI_class_id_mapping.keys():
        cHL1_MIBI_class_id = key
        cHL2_MIBI_class_id = cHL1_MIBI_to_cHL2_MIBI_class_id_mapping[key]
        cHL2_MIBI_pred_probs[:, cHL2_MIBI_class_id] =  pred_probs[:, cHL1_MIBI_class_id]

        I = pred_labels==cHL1_MIBI_class_id
        cHL2_MIBI_pred_labels[I] = cHL2_MIBI_class_id

    gt_labels = np.expand_dims(gt_labels, axis=1)
    cHL2_MIBI_pred_labels = np.expand_dims(cHL2_MIBI_pred_labels, axis=1)
    res_df = pd.DataFrame(np.concatenate((cHL2_MIBI_pred_probs, cHL2_MIBI_pred_labels, gt_labels), axis=1), columns=res_df_cols)
    os.makedirs(os.path.join(result_dir, 'fold_%d' % i), exist_ok=True)
    res_df.to_csv(os.path.join(result_dir, 'fold_%d' % i, 'results_test.csv'), index=False)

Batch 1804/1804            

## Evaluation

In [5]:
class_name_csv_path = '~/MAPS/cHL2_MIBI/datasets/class_names.csv'
result_csv_name = 'results_test.csv'
result_dir = '~/MAPS/cHL1_MIBI/results/cHL2_MIBI_test/'

class_names_all = ['B', 'CD4 T', 'CD4 CTL', 'CD8 T', 'DC', 'Endothelial', 'M1', 'M2', 'NK', 'Neutrophil', 'Other', 'Treg', 'Tumor', 'Epithelial', 'Granulocyte', 'Lymphatic', 'Mast', 'Monocyte', 'Macrophage', 'Plasma Cell', 'Smooth Muscle', 'Stroma']

class_name_mapping = {
    "B": "B",
    "CD4 T": "CD4 T",
    "CD4 Treg": "Treg",
    "CD8 T": "CD8 T",
    "DC": "DC",
    "Endothelial": "Endothelial",
    "M1": "M1",
    "M2": "M2",
    "NK": "NK",
    "Neutrophil": "Neutrophil",
    "Other": "Other",
    "Tumor": "Tumor"
}

color_palette =['#bed8d9', '#2bb8b2', '#fed53f', '#f98866', '#fe0000']

class_ids = pd.read_csv(class_name_csv_path)['class_id'].tolist()
class_names = pd.read_csv(class_name_csv_path)['class_name'].tolist()
class_names = [class_name_mapping[c_name] for c_name in class_names]

class_id_mapping = []
class_names_ordered = []
for i, c_name in enumerate(class_names_all):
    if c_name in class_names:
        class_names_ordered.append(c_name)
        class_id_mapping.append(class_ids[class_names.index(c_name)])

### Quantitative Results

In [None]:
conf_matrix(result_dir, [0, 1, 2, 3, 4], class_names_ordered[:], class_id_mapping, result_csv_name=result_csv_name)
accuracy_plots(result_dir, [0, 1, 2, 3, 4], class_names_ordered[:], class_id_mapping, color_palette, result_csv_name=result_csv_name)
performance_plots(result_dir, [0, 1, 2, 3, 4], class_names_ordered[:], class_id_mapping, color_palette, result_csv_name=result_csv_name)
micro_average_pr_curve(result_dir, [0, 1, 2, 3, 4], class_names_ordered[:], class_id_mapping, result_csv_name=result_csv_name)
average_pr_curve_per_class(result_dir, [0, 1, 2, 3, 4], class_names_ordered[:], class_id_mapping, plt.get_cmap('tab20').colors, result_csv_name=result_csv_name)