In [1]:
import os
import pickle
from src.util import plotting_util as plot_util

REPO_DIR = os.path.join(os.getcwd(), "../../")
os.chdir(REPO_DIR)
METIENT_OUTPUT_DIR = os.path.join(REPO_DIR, 'src/data/tracerx_nsclc/metient_outputs')
ORCHARD_OUTPUT_DIR = os.path.join(METIENT_OUTPUT_DIR, 'pairtree_clustered_orchard_trees')
CONIPHER_OUTPUT_DIR = os.path.join(METIENT_OUTPUT_DIR, 'pyclone_clustered_conipher_trees')


NOTE: Redirects are currently not supported in Windows or MacOs.


In [2]:
def get_patients(pickle_files_dir):
    patients = []
    for file in os.listdir(pickle_files_dir):
        if ".pickle" in file:
            patients.append(file.split(".")[0])
    print(f"{len(patients)} patients")
    return patients

def get_info(pickle_files_dir):
    patients = get_patients(pickle_files_dir)
    seeding_patterns = dict()
    monoclonal_ct = 0
    for patient in patients:
        file = open(os.path.join(pickle_files_dir, f"{patient}.pickle"),'rb')
        pckl = pickle.load(file)
        V = pckl['ancestral_labelings'][0]
        A = pckl['full_adjacency_matrices'][0]
        sites = pckl['ordered_anatomical_sites']
        G = plot_util.get_migration_graph(V, A)
        #print(G, sites)
        pattern = plot_util.get_seeding_pattern_from_migration_graph(G)
        if "monoclonal" in pattern:
            monoclonal_ct += 1
        seeding_patterns[patient] = pattern
    print(f"# monoclonal cases: {monoclonal_ct}, # polyclonal cases: {len(patients)-monoclonal_ct}")
    return seeding_patterns, monoclonal_ct



### Load metient outputs using various combinations of clustering alg + tree inference alg + metient hyperparams

In [3]:
# (1) pyclone clusters + conipher trees + metient (maximum parsimony)
conipher_mp_seeding_patterns, monoclonal_ct = get_info(os.path.join(CONIPHER_OUTPUT_DIR, 'max_pars'))

# (2) pyclone clusters + conipher trees + metient (maximum parsimony + genetic distance)
conipher_mp_gd_seeding_patterns, monoclonal_ct = get_info(os.path.join(CONIPHER_OUTPUT_DIR, 'max_pars_genetic_distance'))

# (3) pairtree clusters + orchard trees + metient (maximum parsimony)
conipher_mp_seeding_patterns, monoclonal_ct = get_info(os.path.join(ORCHARD_OUTPUT_DIR, 'max_pars'))

# (4) pairtree clusters + orchard trees + metient (maximum parsimony + genetic distance)
conipher_mp_gd_seeding_patterns, monoclonal_ct = get_info(os.path.join(ORCHARD_OUTPUT_DIR, 'max_pars_genetic_distance'))


128 patients
# monoclonal cases: 64, # polyclonal cases: 64
128 patients
# monoclonal cases: 64, # polyclonal cases: 64
128 patients
# monoclonal cases: 6, # polyclonal cases: 122
128 patients
# monoclonal cases: 7, # polyclonal cases: 121


In [4]:
conipher_mp_gd_seeding_patterns

{'CRUK0100_clustered_primary_SU_T1': 'polyclonal single-source seeding',
 'CRUK0519_clustered_primary_SU_T1': 'polyclonal single-source seeding',
 'CRUK0666_clustered_primary_SU_T1': 'polyclonal single-source seeding',
 'CRUK0035_clustered_primary_SU_T1': 'polyclonal single-source seeding',
 'CRUK0698_clustered_primary_SU_T1': 'polyclonal multi-source seeding',
 'CRUK0097_clustered_primary_SU_T1': 'polyclonal single-source seeding',
 'CRUK0543_clustered_primary_SU_T1': 'polyclonal single-source seeding',
 'CRUK0236_clustered_primary_SU_T1': 'monoclonal single-source seeding',
 'CRUK0476_clustered_primary_SU_T1': 'polyclonal single-source seeding',
 'CRUK0465_clustered_primary_SU_T1': 'polyclonal single-source seeding',
 'CRUK0636_clustered_primary_SU_T1': 'polyclonal single-source seeding',
 'CRUK0625_clustered_primary_SU_T1': 'polyclonal single-source seeding',
 'CRUK0497_clustered_primary_SU_T1': 'polyclonal single-source seeding',
 'CRUK0484_clustered_primary_SU_T1': 'polyclonal sin

### How consistent are the dissemination patterns?

In [5]:
import seaborn as sns
