In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from environment import *
from setting import *

In [None]:
target_x_sample = pd.read_table(
    PATH_DICT['target_x_sample_file_path'],
    index_col=0,
)

In [None]:
groups = {
    'Stemness and EMT Signatures': {
        'gene_sets': (
            'Cancer Stem Cell',
            'BENPORATH_ES_CORE_NINE',
            'BENPORATH_ES_WITH_H3K27ME3',
            'HOEBEKE_LYMPHOID_STEM_CELL_UP',
            'IVANOVA_HEMATOPOIESIS_STEM_CELL_AND_PROGENITOR',
            'MIKKELSEN_IPS_LCP_WITH_H3K4ME3',
            'MIKKELSEN_IPS_WITH_HCP_H3K27ME3',
            'GOTZMANN_EPITHELIAL_TO_MESENCHYMAL_TRANSITION_UP',
        ),
        'size': 12,
        'color': ccal.plot.plot.style.CATEGORICAL_COLORS[0],
    },
    'Oncogenic Signatures': {
        'gene_sets': (    
            'HALLMARK_NOTCH_SIGNALING',
            'HALLMARK_TGF_BETA_SIGNALING',
            'HALLMARK_E2F_TARGETS',
            'HALLMARK_WNT_BETA_CATENIN_SIGNALING',
            'BIOCARTA_WNT_PATHWAY',
            'PRC2_EZH2_UP.V1_UP',
            'E2F3_UP.V1_UP',
        ),
        'size': 12,
        'color': ccal.plot.plot.style.CATEGORICAL_COLORS[1],
    },
    'NFkB Signatures': {
        'gene_sets': (
            'HINATA_NFKB_TARGETS_KERATINOCYTE_UP',
            'TIAN_TNF_SIGNALING_VIA_NFKB',
        ),
        'size': 12,
        'color': ccal.plot.plot.style.CATEGORICAL_COLORS[2],
    },
    'Differentiation Signatures': {
        'gene_sets': (
            'RODRIGUES_THYROID_CARCINOMA_POORLY_DIFFERENTIATED_UP',
            'MA_MYELOID_DIFFERENTIATION_UP',
            'ADDYA_ERYTHROID_DIFFERENTIATION_BY_HEMIN',
        ),
        'size': 12,
        'color': ccal.plot.plot.style.CATEGORICAL_COLORS[3],
    },
}

In [None]:
def plot_hill(
    score_p_value,
    groups,
    name,
):
    
    score_p_value.sort_values(
        'Score',
        inplace=True,
    )
    
    xs = [1 - score_p_value['P-Value']]

    ys = [score_p_value['Score']]

    names = [name]

    markers =[dict(
        color='#d0d0d0',
        size=3.2,
    )]

    texts = [score_p_value.index]

    for group_name, group in groups.items():

        group_gene_sets = group['gene_sets']

        score_p_value__peek = score_p_value.loc[[gene_set in group_gene_sets for gene_set in score_p_value.index]]

        xs.append(1 - score_p_value__peek['P-Value'])

        ys.append(score_p_value__peek['Score'])

        names.append(group_name)

        texts.append(score_p_value__peek.index)

        markers.append(dict(
            size=group['size'],
            color=group['color'],
        ))

    ccal.plot_points(
        xs,
        ys,
        names=names,
        modes=('lines', ) + ('markers', ) * len(names),
        texts=texts,
        markers=markers,
        title='{}<br>{}'.format(
            target_name,
            name,
        ),
        xaxis_title='1 - P-Value',
        yaxis_title='Gene Set Score',
        html_file_path='{}/{}'.format(
            output_directory_path,
            name,
        ),
    )

In [None]:
for target_name in target_x_sample.index:
    
    output_directory_path = '../output/hill_plot/{}'.format(target_name)
    
    ccal.establish_path(
        output_directory_path,
        'directory',
    )
    
    score_moe_p_value_fdr = pd.read_table(
        '{}/{}/all.tsv'.format(
            PATH_DICT['differentially_expressed_gene_set_directory_path'],
            target_name,
        ),
        index_col=0,
    )
    
    plot_hill(
        score_moe_p_value_fdr,
        groups,
        'all',
    )
    
    for gene_set_file_path in PATH_DICT['gene_set_file_paths']:
        
        gene_set_file_name = gene_set_file_path.split('/')[-1]
        
        gmt = ccal.read_gmt(gene_set_file_path)
        
        score_moe_p_value_fdr__gmt = score_moe_p_value_fdr.loc[gmt.index]
        
        plot_hill(
            score_moe_p_value_fdr__gmt,
            groups,
            gene_set_file_name,
        )