In [2]:
# start coding here

import pandas as pd
import re
import altair as alt

import warnings
warnings.filterwarnings('ignore')

In [3]:
sgrnas_summary = pd.read_csv(snakemake.input[1], sep = "\t")
# print(sgrnas_summary)

In [4]:
genes_summary = pd.read_csv(snakemake.input[0], sep = "\t")
# print(genes_summary)

In [5]:
for col in genes_summary.columns:
    print(col)

In [6]:
def get_conditions_name(genes_summary):
    samples = []
    for col in genes_summary.columns: 
        m = re.search("(^.+)\|beta", col)
        if m and m.group(1) not in samples:
            samples.append(m.group(1))
    return(samples)

conditions = get_conditions_name(genes_summary)
# print(conditions)

In [7]:
def get_condition_cols(genes_summary, condition):
    cols = [col for col in genes_summary.columns if col.startswith(condition)]
    cols.extend(['Gene', 'sgRNA'])
    return genes_summary[cols]


def get_conditions_dictionnary(genes_summary, conditions):
    conditions_tables_dict = dict.fromkeys(conditions, 0)
    for condition in conditions:
        conditions_tables_dict[condition] = get_condition_cols(genes_summary, condition)
    return conditions_tables_dict


conditions_tables_dict = get_conditions_dictionnary(genes_summary, conditions)

# print(conditions_tables_dict)

In [8]:
def get_essentials_genes_in_condition(full_table, condition):
    table = get_condition_cols(full_table, condition)
    table = table.loc[(table[str(condition + "|fdr")] < 0.05) & (table[str(condition + "|beta")] < 0)]
    return table

# res = get_essentials_genes_in_condition(genes_summary, 'M07e-J25')
# for value in res[['Gene']].values:
#     print(value[0])

In [9]:
def get_beta_plot(genes_summary, condition, fdr_cutoff):
    table = get_condition_cols(genes_summary, condition)

    chart = alt.Chart(table)
    
    beta = '{condition}|beta'.format(condition=condition)
    fdr = '{condition}|fdr'.format(condition=condition)
    
    table['beta_rank'] = table[beta].rank(method='max')

    table['fdr_rank'] = table[fdr.format(condition=condition)].rank(method='max')

    table.loc[table[fdr] < fdr_cutoff, 'significant'] = 'True' 
    table.loc[table[fdr] >= fdr_cutoff, 'significant'] = 'False' 

    selection = alt.selection_multi(fields=['significant'], bind='legend')
    
    beta_plot = alt.Chart(table).mark_circle(size=60).encode(
        y=beta,
        x='beta_rank',
        color='significant',
        tooltip=['Gene', 'sgRNA', beta, fdr, 'significant'],
        opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
    ).properties(
        title=condition+" gene summary"
    ).interactive(
    ).add_selection(
        selection
    )
    
    line = alt.Chart(pd.DataFrame({beta: [0]})).mark_rule().encode(y=beta)

    return (beta_plot + line).properties(width=600)
    

In [10]:
def save_beta_plot(genes_summary, condition, fdr):
    plot = get_beta_plot(genes_summary, condition, fdr)

    token = snakemake.wildcards[0]
    name = snakemake.wildcards[1]

    plot_name = "results/{token}/MAGeCK_MLE/MAGeCK_MLE_{name}_{condition}_genes_beta_plot.html".format(token=token, condition=condition, name=name)  #snakemake.output[1]

    plot.save(plot_name)

In [11]:
baseline = 'M07e-J4'
for condition in [cond for cond in conditions_tables_dict.keys() if cond != baseline]:
    print(condition)
    save_beta_plot(genes_summary, condition, 0.05)

In [12]:
with open(snakemake.output[0], "w") as file:
    print("Done.", file=file)

In [13]:
def get_gene_info(table, gene):
    gene_info = table.loc[(table['Gene'] == gene)]
    return gene_info

def plot_gene_accros_conditions(genes_summary, condition, gene):
    info = get_gene_info(genes_summary, gene)

    info_plot = info.filter(regex='beta$|^Gene',axis=1).melt(id_vars=['Gene'])

    chart = alt.Chart(info_plot)

    sort_cols = ['M07e-J4', 'M07e-J11', 'M07e-J18', 'M07e-J25']

    beta = '{condition}|beta'.format(condition=condition)
    fdr = '{condition}|fdr'.format(condition=condition)

    plot = alt.Chart(info_plot).mark_circle(size=60).encode(
            y='value',
            x=alt.X('variable:N', sort=sort_cols),
            #color='significant',
            #tooltip=['Gene', 'sgRNA', beta, fdr, 'significant'],
            #opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
        ).properties(
            title=gene+" summary"
        )#.interactive(
        #).add_selection(
        #    selection
        #)
    
    return plot




In [14]:
plot_gene_accros_conditions(genes_summary, 'M07e-J25', "MYC")

In [15]:
info = get_gene_info(genes_summary, 'TET2')
cols = [col for col in info.columns if col.endswith('|fdr')]
cols.append('Gene')
info_plot = info.filter(regex='beta$|^Gene|\|fdr$',axis=1).melt(id_vars=cols)
info_plot

In [16]:
info = get_gene_info(genes_summary, 'TET2')
cols = [col for col in info.columns if col.endswith('|fdr')]
cols.append('Gene')
info_plot = info.filter(regex='beta$|^Gene|\|fdr$',axis=1).melt(id_vars=cols)
info_plot

In [17]:
chart = alt.Chart(info_plot)

sort_cols = ['M07e-J4', 'M07e-J11', 'M07e-J18', 'M07e-J25']

beta = '{condition}|beta'.format(condition=condition)
fdr = '{condition}|fdr'.format(condition=condition)

plot = alt.Chart(info_plot).mark_circle(size=60).mark_point(
    filled=True,
    size=100
    ).encode(
            y='value',
            x=alt.X('variable:N', sort=sort_cols),
            #color='significant',
            #tooltip=['Gene', 'sgRNA', beta, fdr, 'significant'],
            #opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
    ).properties(
            title="Gene summary"
    )

plot

In [18]:
def get_gene_accross_conditions_plot(genes_summary, conditions, gene, fdr_cutoff, baseline):
    rows = []
    for condition in conditions:
        info = get_gene_info(genes_summary, gene)
        beta = '{condition}|beta'.format(condition=condition)
        fdr = '{condition}|fdr'.format(condition=condition)
        info = info[["Gene", fdr, beta]]
        info['condition'] = condition
        info.columns = ['Gene', 'fdr', 'beta', 'condition']
        rows.append(info)


    result = pd.concat(rows)
    result.loc[result['fdr'] < fdr_cutoff, 'significant'] = 'True' 
    result.loc[result['fdr'] >= fdr_cutoff, 'significant'] = 'False'
    result.loc[result['condition'] == baseline, 'significant'] = 'Baseline'

    chart = alt.Chart(result)

    sort_cols = ['M07e-J4', 'M07e-J11', 'M07e-J18', 'M07e-J25']

    plot = alt.Chart(result).mark_circle(size=60).mark_point(
        filled=True,
        size=100
        ).encode(
                y='beta',
                x=alt.X('condition:N', sort=sort_cols),
                color='significant',
                tooltip=['Gene', 'beta', 'fdr', 'significant'],
                #opacity=alt.condition(selection, alt.value(1), alt.value(0.1))
        ).properties(
                title=gene + " beta versus baseline"
        )

    return plot

In [21]:
gene = 'TET2'
get_gene_accross_conditions_plot(genes_summary, conditions, gene, 0.05, baseline)