In [None]:
import pandas as pd
import numpy as np
from scipy.cluster import hierarchy
from scipy.cluster.hierarchy import dendrogram, linkage
from matplotlib import pyplot as plt
import seaborn as sns
import collections
import itertools

In [None]:
# Define methods

def read_values(parent_folder, min_num_shared_clones, dataset_name):
    metric_values = pd.read_csv(
        "{}/metric_values_min_{}_{}.csv"
                .format(parent_folder, min_num_shared_clones, dataset_name)
        , index_col=0)
    print("Shape of {}: {}".format("metric_values", metric_values.shape))
    num_cells_shared_clones = pd.read_csv(
        "{}/num_cells_of_shared_clones_min_{}_{}.csv"
                .format(parent_folder, min_num_shared_clones, dataset_name)
        , index_col=0)
    print("Shape of {}: {}".format("num_cells_shared_clones", num_cells_shared_clones.shape))
    num_shared_clones = pd.read_csv(
        "{}/num_shared_clones_min_{}_{}.csv"
                .format(parent_folder, min_num_shared_clones, dataset_name)
        , index_col=0)
    num_shared_clones = num_shared_clones.astype('int')
    print("Shape of {}: {}".format("num_shared_clones", num_shared_clones.shape))

    z_scores = pd.read_csv(
        "{}/zscore_min_{}_{}.csv"
                .format(parent_folder, min_num_shared_clones, dataset_name)
        , index_col=0)
    print("Shape of {}: {}".format("z_scores", z_scores.shape))
    
    return (metric_values, num_cells_shared_clones, num_shared_clones,
                     z_scores)

def plot_clustermap_zscores(title
                            , lineage_coupling_scores_matrix_output_file
                            , array_2D, annot, method
                            , labels_array_order, vmin, vmax
                            , figsize
                            , font_scale, annot_font_size):
    """
    Plot clustermap of the values in array_2D.

    An optional step of hiding the upper triangular matrix
    can be taken.
    """
    # print(title)
    # print_2D_float_list(array_2D)
    mask = np.zeros_like(array_2D)
    # Set the next assigment to False for plotting
    # the whole matrix, and to True for only the lower triangular matrix
    # with the diagonal
    mask[np.triu_indices_from(mask)] = False
    mask[np.diag_indices_from(mask)] = False
    with sns.axes_style("white"):
        sns.set(font_scale=font_scale)
        ax = sns.clustermap(array_2D
                    , figsize=figsize
                    , annot = annot
                    , annot_kws = {"size":annot_font_size}
                    , method = method
                    , mask=mask, vmin=vmin, vmax=vmax
                    # , annot=True
                    # , square=True
                    , cmap="bwr"
                    , fmt="d")
        plt.setp(ax.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
    plt.savefig(lineage_coupling_scores_matrix_output_file)
    plt.show()
    return ax

    
def plot_correlation_clustermap(title
                                , lineage_coupling_scores_correlation_matrix_output_file
                                , array_2D, annot, method
                                , labels_array_order
                                , figsize
                                , font_scale, annot_font_size):
    """
    Plot clustermap of the values in array_2D.

    An optional step of hiding the upper triangular matrix
    can be taken.
    """
    # print(title)
    array_2D = array_2D.corr()
    # print_2D_float_list(array_2D)
    mask = np.zeros_like(array_2D)
    # Set the next assigment to False for plotting
    # the whole matrix, and to True for only the lower triangular matrix
    # with the diagonal
    mask[np.triu_indices_from(mask)] = False
    mask[np.diag_indices_from(mask)] = False
    with sns.axes_style("white"):
        sns.set(font_scale=font_scale)
        ax = sns.clustermap(array_2D
                    , figsize=figsize
                    , annot = annot
                    , annot_kws = {"size":annot_font_size}
                    , method = method
                    , mask=mask, vmin=-1.0, vmax=1.0
                    # , annot=True
                    # , square=True
                    , cmap="bwr"
                    , fmt="d")
        plt.setp(ax.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
    plt.savefig(lineage_coupling_scores_correlation_matrix_output_file)
    plt.show()
    return ax
    
def plot_clustermap_custom_cmap(array_2D
                            , lineage_coupling_scores_correlation_matrix_output_file
                            , annot, method
                            , labels_array_order, vmin, vmax
                            , figsize
                            , font_scale, annot_font_size
                            , cmap="YlOrRd"):
    """
    Plot clustermap of the values in array_2D.

    An optional step of hiding the upper triangular matrix
    can be taken.
    """
    # print_2D_float_list(array_2D)
    mask = np.zeros_like(array_2D)
    # Set the next assigment to False for plotting
    # the entire matrix, and to True for
    # only the lower triangular matrix with the diagonal
    mask[np.triu_indices_from(mask)] = False
    mask[np.diag_indices_from(mask)] = False
    with sns.axes_style("white"):
        sns.set(font_scale=font_scale)
        ax = sns.clustermap(array_2D
                    , figsize=figsize
                    , annot = annot
                    , annot_kws = {"size":annot_font_size}
                    , method = method
                    , mask=mask, vmin=vmin, vmax=vmax
                    # , annot=True
                    # , square=True
                    , cmap=cmap
                    , fmt="d")
        plt.setp(ax.ax_heatmap.yaxis.get_majorticklabels(), rotation=0)
    plt.savefig(lineage_coupling_scores_correlation_matrix_output_file)
    plt.show()
    return ax

def plot_heatmap(array_2D, reference_ax
                , heatmap_output_file
                , annot, labels_array_order, vmin, vmax
                , font_scale, figsize, annot_font_size
                , cmap):
    """
    Plot clustermap of the values in array_2D.

    An optional step of hiding the upper triangular matrix
    can be taken.
    """
    array_2D = array_2D[reference_ax.data2d.columns.tolist()]
    array_2D = array_2D.reindex(list(map(str, reference_ax.data2d.index.tolist())))
    # print_2D_float_list(array_2D)
    mask = np.zeros_like(array_2D)
    # Set the next assigment to False for plotting
    # the entire matrix, and to True for
    # only the lower triangular matrix with the diagonal
    mask[np.triu_indices_from(mask)] = False
    mask[np.diag_indices_from(mask)] = False
    with sns.axes_style("white"):
        fig, ax = plt.subplots(figsize=figsize)
        sns.set(font_scale=font_scale)
        ax = sns.heatmap(array_2D
                    , annot = annot
                    , annot_kws = {"size":annot_font_size}
                    , mask=mask, vmin=vmin, vmax=vmax
                    # , annot=True
                    # , square=True
                    , cmap=cmap, ax=ax
                    , fmt="d")
        plt.setp(ax.yaxis.get_majorticklabels(), rotation=0)
    plt.savefig(heatmap_output_file)
    plt.show()
    return ax

def print_output_filenames(dataset_folders, min_num_shared_clones_list
                      , suffixes, annot, parent_folder):
    for (i, (dataset_folder, min_num_shared_clones, suffix)) \
                in enumerate(itertools.product(dataset_folders, min_num_shared_clones_list
                       , suffixes)):
        parent_folder_dataset = "{}/{}".format(parent_folder, dataset_folder)
        parent_folder_results = ("{}/analysis".format(parent_folder_dataset))
        dataset_name = "{}_{}".format(dataset_folder, suffix)
        lineage_coupling_scores_clustermap_output_file = (
                "{}/zscore_min_{}_{}_{}.pdf"
                    .format(parent_folder_results, min_num_shared_clones
                            , dataset_name, annot))
        print("{}:\n{}\n".format(i, lineage_coupling_scores_clustermap_output_file))
            

def print_zscore_commands(
        dataset_folders, min_num_shared_clones_list
        , suffixes, vmin, vmax
        , num_shufflings, parent_folder
        , script_folder = "."
        , script_name = "lineage_coupling_analysis.py"
        , enum_bool=False):
    for (i, (dataset_folder, min_num_shared_clones, suffix)) \
                in enumerate(itertools.product(dataset_folders
                                               , min_num_shared_clones_list
                                               , suffixes)):
        parent_folder_dataset = "{}/{}".format(parent_folder, dataset_folder)
        parent_folder_input = ("{}/lb_pool".format(parent_folder_dataset))
        parent_folder_results = ("{}/analysis".format(parent_folder_dataset))
        dataset_name = "{}_{}".format(dataset_folder, suffix)
        csv_data_file = "'{}/{}.csv'".format(parent_folder_input
                                                       , dataset_name)
        csv_metric_values_matrix_output_file = (
                        "'{}/metric_values_min_{}_{}.csv'".format(parent_folder_results
                                                                  , min_num_shared_clones
                                                                  , dataset_name))
        csv_lineage_coupling_scores_matrix_output_file = (
                        "'{}/zscore_min_{}_{}.csv'".format(parent_folder_results
                                                                  , min_num_shared_clones
                                                                  , dataset_name))
        csv_lineage_coupling_scores_correlation_matrix_output_file = (
                        "'{}/zscore_correl_min_{}_{}.csv'".format(
                                                                    parent_folder_results
                                                                  , min_num_shared_clones
                                                                  , dataset_name))
        csv_num_shared_clones_matrix_output_file = (
                        "'{}/num_shared_clones_min_{}_{}.csv'".format(parent_folder_results
                                                                  , min_num_shared_clones
                                                                  , dataset_name))
        csv_num_cells_of_shared_clones_matrix_output_file = (
                        "'{}/num_cells_of_shared_clones_min_{}_{}.csv'".format(parent_folder_results
                                                                  , min_num_shared_clones
                                                                  , dataset_name))
        lineage_coupling_scores_clustermap_output_file = (
                        "'{}/zscore_min_{}_{}.pdf'".format(parent_folder_results
                                                                  , min_num_shared_clones
                                                                  , dataset_name))
        lineage_coupling_scores_correlation_clustermap_output_file = (
                        "'{}/zscore_correl_min_{}_{}.pdf'".format(parent_folder_results
                                                                  , min_num_shared_clones
                                                                  , dataset_name))
        command = ("python {}/{} -N {}"
                " -u {} -v {} -s {} -f {} -Z {} -C {} -M {} -S {} -T {} -U {} -V {}".format(
                        script_folder, script_name, num_shufflings, vmin, vmax
                        , min_num_shared_clones
                        , csv_data_file, csv_lineage_coupling_scores_matrix_output_file
                        , csv_lineage_coupling_scores_correlation_matrix_output_file
                        , csv_metric_values_matrix_output_file
                        , csv_num_shared_clones_matrix_output_file
                        , csv_num_cells_of_shared_clones_matrix_output_file
                        , lineage_coupling_scores_clustermap_output_file
                        , lineage_coupling_scores_correlation_clustermap_output_file))
        if(enum_bool):
            print("{}:\n".format(i))
        print("{}\n".format(command))

def print_copy_zscore_commands_to_Windows(dataset_folders, min_num_shared_clones_list
                      , suffixes, annot, parent_folder, parent_folder_destination):
    for (i, (dataset_folder, min_num_shared_clones, suffix)) \
                in enumerate(itertools.product(dataset_folders, min_num_shared_clones_list
                       , suffixes)):
        parent_folder_dataset = "{}/{}".format(parent_folder, dataset_folder)
        parent_folder_results = ("{}/analysis".format(parent_folder_dataset))
        dataset_name = "{}_{}".format(dataset_folder, suffix)
        csv_lineage_coupling_scores_matrix_output_file = (
                        "'{}/zscore_min_{}_{}.csv'".format(parent_folder_results
                                                                  , min_num_shared_clones
                                                                  , dataset_name))
        csv_num_shared_clones_matrix_output_file = (
                        "'{}/num_shared_clones_min_{}_{}.csv'".format(parent_folder_results
                                                                  , min_num_shared_clones
                                                                  , dataset_name))
        csv_lineage_coupling_scores_correlation_matrix_output_file = (
                        "'{}/zscore_correl_min_{}_{}.csv'".format(
                                                                    parent_folder_results
                                                                  , min_num_shared_clones
                                                                  , dataset_name))
        lineage_coupling_scores_clustermap_output_file = (
                "'{}/zscore_min_{}_{}_{}.pdf'"
                    .format(parent_folder_results, min_num_shared_clones
                            , dataset_name, annot))
        num_shared_clones_clustermap_output_file = (
                "'{}/num_shared_clones_min_{}_{}_{}.pdf'"
                .format(parent_folder_results, min_num_shared_clones
                            , dataset_name, annot))
        lineage_coupling_scores_correlation_matrix_output_file = (
                "'{}/zscore_correl_min_{}_{}_{}.pdf'"
                .format(parent_folder_results, min_num_shared_clones
                            , dataset_name, annot))
        print('mkdir "{}\\{}"'.format(
                      parent_folder_destination
                      , suffix))
        print('pscp -P 22 username@host:{} "{}\\{}"'.format(
                      csv_lineage_coupling_scores_matrix_output_file
                      , parent_folder_destination
                      , suffix))
        print('pscp -P 22 username@host:{} "{}\\{}"'.format(
                      csv_num_shared_clones_matrix_output_file
                      , parent_folder_destination
                      , suffix))
        print('pscp -P 22 username@host:{} "{}\\{}"'.format(
                      csv_lineage_coupling_scores_correlation_matrix_output_file
                      , parent_folder_destination
                      , suffix))
        print('pscp -P 22 username@host:{} "{}\\{}"'.format(
                      lineage_coupling_scores_clustermap_output_file
                      , parent_folder_destination
                      , suffix))
        print('pscp -P 22 username@host:{} "{}\\{}"'.format(
                      num_shared_clones_clustermap_output_file
                      , parent_folder_destination
                      , suffix))
        print('pscp -P 22 username@host:{} "{}\\{}"'.format(
                      lineage_coupling_scores_correlation_matrix_output_file
                      , parent_folder_destination
                      , suffix))
        print("\n")

def print_copy_Dendrogram_zscore_commands_to_Windows(
        dataset_folders, min_num_shared_clones_list
        , clusterizations, lb_calling_methods
        , parent_folder, parent_folder_destination):
    suffixes = ["{}_{}".format(clusterization, lb_calling_method)
                        for clusterization, lb_calling_method
                                    in itertools.product(clusterizations
                                                       , lb_calling_methods)
               ]
    for (i, (dataset_folder, min_num_shared_clones, clusterization, lb_calling_method)) \
                in enumerate(itertools.product(dataset_folders, min_num_shared_clones_list
                       , clusterizations, lb_calling_methods)):
        suffix = "{}_{}".format(clusterization, lb_calling_method)
        parent_folder_dataset = "{}/{}".format(parent_folder, dataset_folder)
        parent_folder_results = ("{}/analysis".format(parent_folder_dataset))
        dataset_name = "{}_{}".format(dataset_folder, suffix)
        dendrogram_lineage_coupling_scores_matrix_output_file = (
                        "'{}/Dendrogram_zscore_{}.pdf'".format(
                                                                    parent_folder_results
                                                                  , dataset_name))
        dendrogram_transcriptome_output_file = (
                        "'{}/Dendrogram_transcriptome_{}_{}.pdf'".format(
                                              parent_folder_results
                                              , dataset_folder
                                              , clusterization))
        dendrogram_lineage_coupling_scores_correlation_matrix_output_file = (
                        "'{}/Dendrogram_zscore_correl_{}.pdf'".format(
                                                                    parent_folder_results
                                                                  , dataset_name))
        print('mkdir "{}\\{}"'.format(
                      parent_folder_destination
                      , suffix))
        print('pscp -P 22 username@host:{} "{}\\{}"'.format(
                      dendrogram_lineage_coupling_scores_matrix_output_file
                      , parent_folder_destination, suffix))
        print('pscp -P 22 username@host:{} "{}\\{}"'.format(
                      dendrogram_transcriptome_output_file
                      , parent_folder_destination, suffix))
        print('pscp -P 22 username@host:{} "{}\\{}"'.format(
                      dendrogram_lineage_coupling_scores_correlation_matrix_output_file
                      , parent_folder_destination, suffix))
        print("\n")
        
        
def print_copy_zscore_commands(dataset_folders, min_num_shared_clones_list
                      , suffixes, annot, parent_folder, parent_folder_destination):
    for (i, (dataset_folder, min_num_shared_clones, suffix)) \
                in enumerate(itertools.product(dataset_folders, min_num_shared_clones_list
                       , suffixes)):
        parent_folder_dataset = "{}/{}".format(parent_folder, dataset_folder)
        parent_folder_results = ("{}/analysis".format(parent_folder_dataset))
        dataset_name = "{}_{}".format(dataset_folder, suffix)
        csv_lineage_coupling_scores_matrix_output_file = (
                        "'{}/zscore_min_{}_{}.csv'".format(parent_folder_results
                                                                  , min_num_shared_clones
                                                                  , dataset_name))
        csv_num_shared_clones_matrix_output_file = (
                        "'{}/num_shared_clones_min_{}_{}.csv'".format(parent_folder_results
                                                                  , min_num_shared_clones
                                                                  , dataset_name))
        csv_lineage_coupling_scores_correlation_matrix_output_file = (
                        "'{}/zscore_correl_min_{}_{}.csv'".format(
                                                                    parent_folder_results
                                                                  , min_num_shared_clones
                                                                  , dataset_name))
        lineage_coupling_scores_clustermap_output_file = (
                "'{}/zscore_min_{}_{}_{}.pdf'"
                    .format(parent_folder_results, min_num_shared_clones
                            , dataset_name, annot))
        num_shared_clones_clustermap_output_file = (
                "'{}/num_shared_clones_min_{}_{}_{}.pdf'"
                .format(parent_folder_results, min_num_shared_clones
                            , dataset_name, annot))
        lineage_coupling_scores_correlation_matrix_output_file = (
                "'{}/zscore_correl_min_{}_{}_{}.pdf'"
                .format(parent_folder_results, min_num_shared_clones
                            , dataset_name, annot))
        print('mkdir "{}"'.format(
                      parent_folder_destination))
        print('mkdir "{}/{}"'.format(
                      parent_folder_destination
                      , suffix))
        print('mkdir "{}/{}/csv"'.format(
                      parent_folder_destination
                      , suffix))
        print('cp {} "{}/{}/csv"'.format(
                      csv_lineage_coupling_scores_matrix_output_file
                      , parent_folder_destination
                      , suffix))
        print('cp {} "{}/{}/csv"'.format(
                      csv_num_shared_clones_matrix_output_file
                      , parent_folder_destination
                      , suffix))
        print('cp {} "{}/{}/csv"'.format(
                      csv_lineage_coupling_scores_correlation_matrix_output_file
                      , parent_folder_destination
                      , suffix))
        print('cp {} "{}/{}"'.format(
                      lineage_coupling_scores_clustermap_output_file
                      , parent_folder_destination
                      , suffix))
        print('cp {} "{}/{}"'.format(
                      num_shared_clones_clustermap_output_file
                      , parent_folder_destination
                      , suffix))
        print('cp {} "{}/{}"'.format(
                      lineage_coupling_scores_correlation_matrix_output_file
                      , parent_folder_destination
                      , suffix))
        print("\n")

def print_copy_Dendrogram_zscore_commands(
        dataset_folders, min_num_shared_clones_list
        , clusterizations, lb_calling_methods
        , parent_folder, parent_folder_destination):
    suffixes = ["{}_{}".format(clusterization, lb_calling_method)
                        for clusterization, lb_calling_method
                                    in itertools.product(clusterizations
                                                       , lb_calling_methods)
               ]
    for (i, (dataset_folder, min_num_shared_clones, clusterization, lb_calling_method)) \
                in enumerate(itertools.product(dataset_folders, min_num_shared_clones_list
                       , clusterizations, lb_calling_methods)):
        suffix = "{}_{}".format(clusterization, lb_calling_method)
        parent_folder_dataset = "{}/{}".format(parent_folder, dataset_folder)
        parent_folder_results = ("{}/analysis".format(parent_folder_dataset))
        dataset_name = "{}_{}".format(dataset_folder, suffix)
        dendrogram_lineage_coupling_scores_matrix_output_file = (
                        "'{}/Dendrogram_zscore_{}.pdf'".format(
                                                                    parent_folder_results
                                                                  , dataset_name))
        dendrogram_transcriptome_output_file = (
                        "'{}/Dendrogram_transcriptome_{}_{}.pdf'".format(
                                              parent_folder_results
                                              , dataset_folder
                                              , clusterization))
        dendrogram_lineage_coupling_scores_correlation_matrix_output_file = (
                        "'{}/Dendrogram_zscore_correl_{}.pdf'".format(
                                                                    parent_folder_results
                                                                  , dataset_name))
        print('mkdir "{}"'.format(
                      parent_folder_destination))
        print('mkdir "{}/{}"'.format(
                      parent_folder_destination
                      , suffix))
        print('cp {} "{}/{}"'.format(
                      dendrogram_lineage_coupling_scores_matrix_output_file
                      , parent_folder_destination, suffix))
        print('cp {} "{}/{}"'.format(
                      dendrogram_transcriptome_output_file
                      , parent_folder_destination, suffix))
        print('cp {} "{}/{}"'.format(
                      dendrogram_lineage_coupling_scores_correlation_matrix_output_file
                      , parent_folder_destination, suffix))
        print("\n")

In [None]:
# Print the commands to be used to run the zscore analysis on the desired files/datasets
# Note that the subfolders must exist before running the script
# e.g. if our parent folder is './files' and we want to run the analysis on the dataset 'clonal_dataset'
# then the subfolders that must exist are
# './results/clonal_dataset/lb_pool/' (input files for the script)
# and './results/clonal_dataset/analysis/' (for the results of the lineage coupling analysis)

dataset_folders = ["clonal_dataset", "clonal_dataset_e10", "clonal_dataset_e12", "clonal_dataset_e13", "clonal_dataset_e14"]
min_num_shared_clones_list = [1]
clusterizations = ["refined_COUP_clust", "refined_COUP_class"]
lb_calling_methods = ["Ryan_toptier_9"]
suffixes = ["{}_{}".format(clusterization, lb_calling_method)
                    for clusterization, lb_calling_method
                                in itertools.product(clusterizations
                                                   , lb_calling_methods)
           ]
# print(suffixes)
vmin = -5.0
vmax = 5.0
num_shufflings = 10000
parent_folder = "./files"
script_folder = "."
script_name = "lineage_coupling_analysis.py"

# First activate the conda environment
print("conda activate zscore")
print_zscore_commands(
                    dataset_folders, min_num_shared_clones_list
                    , suffixes, vmin, vmax
                    , num_shufflings, parent_folder
                    , script_folder, script_name
                    , enum_bool=False)

In [None]:
# Plot and output the zscores, zscore correlations, and raw number of shared clones clustermaps

dataset_folders = ["clonal_dataset", "clonal_dataset_e10", "clonal_dataset_e12", "clonal_dataset_e13", "clonal_dataset_e14"]
min_num_shared_clones_list = [1]
clusterizations = ["refined_COUP_clust", "refined_COUP_class"]
lb_calling_methods = ["Ryan_toptier_9"]
suffixes = ["{}_{}".format(clusterization, lb_calling_method)
                    for clusterization, lb_calling_method
                                in itertools.product(clusterizations
                                                   , lb_calling_methods)
           ]
annot = "annot_num_shared_clones"
parent_folder = "./files"

for dataset_folder, min_num_shared_clones, suffix \
            in itertools.product(dataset_folders, min_num_shared_clones_list
                   , suffixes):
    parent_folder_dataset = "{}/{}".format(parent_folder, dataset_folder)
    parent_folder_results = ("{}/analysis".format(parent_folder_dataset))
    dataset_name = "{}_{}".format(dataset_folder, suffix)
    (metric_values, num_cells_shared_clones, num_shared_clones,
             z_scores) = read_values(
                                            parent_folder_results
                                            , min_num_shared_clones
                                            , dataset_name)


    lineage_coupling_scores_clustermap_output_file = (
            "{}/zscore_min_{}_{}_{}.pdf"
            .format(parent_folder_results, min_num_shared_clones
                        , dataset_name, annot))
    print("{}\n".format(lineage_coupling_scores_clustermap_output_file))
    # lineage_coupling_scores_clustermap_output_file = None
    clusters_names_array_order = None
    cluster_pairs_matrix_annot = num_shared_clones
    cluster_pairs_matrix_linkage_method = "average"
    vmin = -5.0
    vmax = 5.0
    # figsize = (10, 10)
    # fontsize = dict([('x',20),  ('y',20)])
    figsize = (13,13)
    font_scale = 0.7
    annot_font_size = 5
    ax_clustermap_zscores = plot_clustermap_zscores(
                        "Lineage Coupling Scores"
                        , lineage_coupling_scores_clustermap_output_file
                        , z_scores, cluster_pairs_matrix_annot
                        , cluster_pairs_matrix_linkage_method
                        , clusters_names_array_order, vmin, vmax
                        , figsize=figsize
                        , font_scale=font_scale
                        , annot_font_size=annot_font_size)


    lineage_coupling_scores_correlation_matrix_output_file = (
            "{}/zscore_correl_min_{}_{}_{}.pdf"
            .format(parent_folder_results, min_num_shared_clones
                        , dataset_name, annot))
    
    print("{}\n".format(lineage_coupling_scores_correlation_matrix_output_file))
    cluster_pairs_matrix_annot = num_shared_clones
    ax_correlation_clustermap_zscores = plot_correlation_clustermap(
                                        "Lineage Coupling Correlation Scores"
                                        , lineage_coupling_scores_correlation_matrix_output_file
                                        , z_scores
                                        , cluster_pairs_matrix_annot
                                        , cluster_pairs_matrix_linkage_method
                                        , clusters_names_array_order
                                        , figsize=figsize
                                        , font_scale=font_scale
                                        , annot_font_size=annot_font_size)


    num_shared_clones_clustermap_output_file = (
            "{}/num_shared_clones_min_{}_{}_{}.pdf"
            .format(parent_folder_results, min_num_shared_clones
                        , dataset_name, annot))
    print("{}\n".format(num_shared_clones_clustermap_output_file))
    clusters_names_array_order = None
    cluster_pairs_matrix_annot = True
    cluster_pairs_matrix_linkage_method = "average"
    vmin = 0.0
    vmax = 50.0
    figsize = (13,13)
    font_scale = 0.7
    annot_font_size = 5
    cmap = "Purples"
#     plot_heatmap(num_shared_clones
#                 , ax_clustermap_zscores
#                 , num_shared_clones_clustermap_output_file
#                 , cluster_pairs_matrix_annot
#                 , clusters_names_array_order, vmin, vmax
#                 , font_scale=font_scale, figsize=figsize
#                 , annot_font_size=annot_font_size
#                 , cmap=cmap)
    # I use plot_clustermap_custom_cmap instead of plot_heatmap because it clusters the rows and columns
    plot_clustermap_custom_cmap(num_shared_clones
#                 , ax_clustermap_zscores
                , num_shared_clones_clustermap_output_file
                , cluster_pairs_matrix_annot
                , cluster_pairs_matrix_linkage_method
                , clusters_names_array_order, vmin, vmax
                , figsize=figsize, font_scale=font_scale
                , annot_font_size=annot_font_size
                , cmap=cmap)

In [None]:
# Print the commands to copy the results of the previous cell to a specific location

dataset_folders = ["clonal_dataset", "clonal_dataset_e10", "clonal_dataset_e12", "clonal_dataset_e13", "clonal_dataset_e14"]
min_num_shared_clones_list = [1]
clusterizations = ["refined_COUP_clust", "refined_COUP_class"]
lb_calling_methods = ["Ryan_toptier_9"]
suffixes = ["{}_{}".format(clusterization, lb_calling_method)
                    for clusterization, lb_calling_method
                                in itertools.product(clusterizations
                                                   , lb_calling_methods)
           ]
annot = "annot_num_shared_clones"
parent_folder = "./files"
parent_folder_destination = ("./coupling")

print_copy_zscore_commands(dataset_folders, min_num_shared_clones_list
                      , suffixes, annot, parent_folder, parent_folder_destination)

In [None]:
# Print the commands to copy the results of the dendrograms analysis to a specific location

dataset_folders = ["clonal_dataset", "clonal_dataset_e10", "clonal_dataset_e12", "clonal_dataset_e13", "clonal_dataset_e14"]
min_num_shared_clones_list = [1]
clusterizations = ["refined_COUP_clust", "refined_COUP_class"]
lb_calling_methods = ["Ryan_toptier_9"]
parent_folder = "./files"
parent_folder_destination = ("./dendrogram")

print_copy_Dendrogram_zscore_commands(
        dataset_folders, min_num_shared_clones_list
        , clusterizations, lb_calling_methods
        , parent_folder, parent_folder_destination)

In [None]:
# Print the PowerShell (Windows) commands to copy the results of the previous cell to a specific location

dataset_folders = ["clonal_dataset", "clonal_dataset_e10", "clonal_dataset_e12", "clonal_dataset_e13", "clonal_dataset_e14"]
min_num_shared_clones_list = [1]
clusterizations = ["refined_COUP_clust", "refined_COUP_class"]
lb_calling_methods = ["Ryan_toptier_9"]
suffixes = ["{}_{}".format(clusterization, lb_calling_method)
                    for clusterization, lb_calling_method
                                in itertools.product(clusterizations
                                                   , lb_calling_methods)
           ]
annot = "annot_num_shared_clones"
parent_folder = "./files"
parent_folder_destination = ("C:\\Users\\username\\lineage_coupling_scores\\")

print_copy_zscore_commands_to_Windows(dataset_folders, min_num_shared_clones_list
                      , suffixes, annot, parent_folder, parent_folder_destination)

In [None]:
# Print the PowerShell (Windows) commands to copy the results of the dendrograms analysis to a specific location

dataset_folders = ["clonal_dataset", "clonal_dataset_e10", "clonal_dataset_e12", "clonal_dataset_e13", "clonal_dataset_e14"]
min_num_shared_clones_list = [1]
clusterizations = ["refined_COUP_clust", "refined_COUP_class"]
lb_calling_methods = ["Ryan_toptier_9"]
parent_folder = "./files"
parent_folder_destination = ("C:\\Users\\username\\dendrograms\\")

print_copy_Dendrogram_zscore_commands_to_Windows(
        dataset_folders, min_num_shared_clones_list
        , clusterizations, lb_calling_methods
        , parent_folder, parent_folder_destination)

In [None]:
metric_values.head

In [None]:
num_cells_shared_clones.head()

In [None]:
num_shared_clones.head()

In [None]:
z_scores.head()