In [1]:
import re
import pprint as pp
import pandas as pd
import time
import altair as alt
from functools import reduce
import sys

from IPython.display import display, HTML
import os

from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

modules_path = ['workflow/scripts', "../../../workflow/scripts"]
for module in modules_path:
    module_path = os.path.abspath(os.path.join(module))
    if module_path not in sys.path:
        sys.path.append(module_path)
    
from functions import *

alt.renderers.enable('default')
pd.set_option('display.max_rows', None)

# For Snakemake
switch = True
while os.path.basename(os.getcwd()) != "PitViper":
    if switch:
        switch = False
        %cd ../../
    else:
        %cd ../

/media/paularthur/data/Projets/Screens/PitViper/PitViper


In [2]:
# Parameters
mageck_mle_outputs = "results/CRISPRi_countFrom_bam/MAGeCK_MLE/"
mageck_rra_outputs = "results/CRISPRi_countFrom_bam/MAGeCK_RRA/"

In [3]:
"./PitViper/results/"

'./PitViper/results/'

## MAGeCK-MLE

The `mageck_mle_viz` function allow to visualize MAGeCK MLE output.

Graphical results can be downloaded in SVG or PNG format from the button at the top-rigth corner of the chart.

Maximum number of rows to display can be changed with the `pd.set_option('display.max_rows', n_rows)` function.

In [4]:
pd.set_option('display.max_rows', 10)
mageck_mle_viz(mageck_mle_outputs)

Chart parameters :


interactive(children=(Text(value='SE_42', description='feature'), FloatSlider(value=0.05, description='fdr', m…

## MAGeCK-RRA

In [5]:
mageck_rra_viz(mageck_rra_outputs)

Chart parameters :


interactive(children=(Text(value='SE_4', description='feature'), FloatSlider(value=0.05, description='fdr', ma…

## BAGEL

In [7]:
bagel_outputs = "results/shRNA_full_essentials_genes_BAGEL/BAGEL/"

In [14]:
def bagel_inputs(bagel_outputs):
    bagel = {}
    for directory in os.listdir(bagel_outputs):
        table_file = bagel_outputs+directory+'/'+directory+'_BAGEL_output.bf'
        bagel[directory] = {'file': table_file, 'table': pd.read_csv(table_file, sep="\t")}
    return bagel

bagel = bagel_inputs(bagel_outputs)
bagel['M07e-J25_vs_M07e-J4']['table']

Unnamed: 0,GENE,BF,STD,NumObs
0,A1CF,-47.865,7.037,367
1,ACTB,50.616,17.854,378
2,ACTL6A,137.561,53.181,391
3,ACTL6B,-30.412,5.788,386
4,ACTR3B,-49.154,7.452,366
...,...,...,...,...
951,ZNF711,-47.270,6.752,353
952,ZNHIT1,-51.867,7.802,334
953,ZRANB3,-37.541,6.245,393
954,ZSCAN21,-41.101,6.060,385


In [30]:
control = 'M07e-J4'

def bagel_results(bagel, control):
    tables = []
    conditions_plot = []
    for condition in bagel.keys():
        trt, con = condition.split("_vs_")
        if con == control:
            conditions_plot.append(trt)
            table = bagel_inputs(bagel_outputs)[condition]['table']
            new_names = [(i,trt + "_" + i) for i in table.iloc[:, 1:].columns.values]
            table.rename(columns = dict(new_names), inplace=True)
            tables.append(table)

    big_table = reduce(lambda  left,right: pd.merge(left,right,on=['GENE'],
                                                how='outer'), tables)

    return (big_table, conditions_plot)

table, conditions = bagel_results(bagel, control)
table

Unnamed: 0,GENE,M07e-J18_BF,M07e-J18_STD,M07e-J18_NumObs,M07e-J11_BF,M07e-J11_STD,M07e-J11_NumObs,M07e-J25_BF,M07e-J25_STD,M07e-J25_NumObs
0,A1CF,-46.880,24.347,367,-23.540,4.749,385,-47.865,7.037,367
1,ACTB,15.772,39.971,374,24.194,5.611,365,50.616,17.854,378
2,ACTL6A,80.346,82.747,361,71.147,10.828,359,137.561,53.181,391
3,ACTL6B,-37.416,24.268,367,-22.325,4.761,367,-30.412,5.788,386
4,ACTR3B,-44.485,23.645,348,-32.073,5.588,365,-49.154,7.452,366
...,...,...,...,...,...,...,...,...,...,...
951,ZNF711,-34.809,27.054,375,-24.808,5.405,392,-47.270,6.752,353
952,ZNHIT1,-37.779,23.609,370,-34.192,6.228,364,-51.867,7.802,334
953,ZRANB3,-39.759,28.001,370,-31.960,5.972,373,-37.541,6.245,393
954,ZSCAN21,-42.761,26.780,355,-29.694,5.157,376,-41.101,6.060,385


In [56]:
gene = 'MYC'
baseline = 'M07e-J4'
genes_summary = table.loc[table['GENE'] == gene]

rows = []
for condition in conditions:
    if condition != baseline:
        info = genes_summary.loc[(genes_summary['GENE'] == gene)]
        bf = '{trt}_BF'.format(trt=condition, con=baseline)
        std = '{trt}_STD'.format(trt=condition, con=baseline)
        nobs = '{trt}_NumObs'.format(trt=condition, con=baseline)
        info = info[["GENE", bf, std, nobs]]
        info['condition'] = condition
        info.columns = ['Gene', 'BF', 'Std', 'NumObs','condition']
        rows.append(info)

result = pd.concat(rows)
new_row = {'Gene':gene, 'BF':0, 'Std':0, 'NumObs':0,'condition':baseline}
result = result.append(new_row, ignore_index=True)

result.loc[result['BF'] > 0, 'essential'] = 'True' 
result.loc[result['BF'] <= 0, 'essential'] = 'False'
result.loc[result['condition'] == baseline, 'essential'] = 'Baseline'

chart = alt.Chart(result)

cond = [cond.split("_vs_")[0] for cond in conditions]

cond.append(baseline)

sort_cols = natural_sort(cond)

domain = ['True', 'False', 'Baseline']
range_ = ['red', 'grey', 'black']

plot = alt.Chart(result).mark_circle(size=60).mark_point(
    filled=True,
    size=100,
    ).encode(
            y='BF',
            x=alt.X('condition:N', sort=sort_cols),
            color=alt.Color('essential', scale=alt.Scale(domain=domain, range=range_), legend=alt.Legend(title="Essentiality:")),
            tooltip=['Gene', 'BF', 'Std', 'essential', 'condition'],
    ).properties(
            title=gene + " beta versus baseline (MAGeCK RRA)",
            width=100
    )

plot