In [2]:
# start coding here
token = snakemake.params

# PitViper Notebook Report

This notebook was generated automatically by PitViper.

It can be used in two ways:

1. By using the functions already created and present in the following cells.

2. By creating new cells and writing python3 code in them.

The graphs are generated using the python library [Altair](https://altair-viz.github.io/index.html). It is possible to download each graph in SVG format from the drop-down menu at the top right of each graph.

The next cell allows to call the functions already created for the visualization of the results.

In [3]:
import sys
import os
import pandas as pd
import IPython
from functools import reduce

modules_path = ['workflow/notebooks/', "../../../workflow/notebooks/"]
for module in modules_path:
    module_path = os.path.abspath(os.path.join(module))
    if module_path not in sys.path:
        sys.path.append(module_path)


from functions import * 

import time

alt.renderers.enable('html')

# For Snakemake
switch = True
while os.path.basename(os.getcwd()) != "PitViper":
    if switch:
        switch = False
        %cd ../../
    else:
        %cd ../

print('Working directory: ', os.getcwd())

with open(snakemake.output[0], "w") as out:
    print("Notebook was runned.", file=out)

## Process data

Next function scan `results/` directory to retrieve all results.

`tools_available` is python dictionnary in which all data are stored in a comprehensive manner.

In [4]:
results_directory, tools_available = setup_step_1(token)

## Mapping Quality Control

If available, mapping quality control metrics will be shown by next function (`show_mapping_qc`).

In [5]:
show_mapping_qc(token)

## Read count distribution

Normalized read count distribution for all replicates will be shown by calling `show_read_count_distribution` function.

In [6]:
alt.data_transformers.disable_max_rows()

show_read_count_distribution(token)

## Principal component analysis

In [8]:
pca_counts(token)

## Tools global results

In [9]:
snake_plot(results_directory, tools_available)

## sgRNA read counts by element

In [7]:
show_sgRNA_counts(token) 

In [10]:
CRISPhieRmix_results(results_directory, tools_available)

In [11]:
GSEA_like_results(results_directory, tools_available)

In [12]:
in_house_method_results(results_directory, tools_available)

In [13]:
MAGeCK_RRA_results(results_directory, tools_available)

In [14]:
MAGeCK_MLE_results(results_directory, tools_available)

In [15]:
BAGEL_results(results_directory, tools_available)

In [16]:
enrichr_plots(tools_available)

### Under development

In [17]:
elements = ["BRCA2","MYC","MYB","TP53"]

link = "http://genemania.org/search/homo-sapiens/" + "/".join(elements)

html = '<iframe src="%s" width="1000" height="800"></iframe>' % link

print(html)

IPython.display.HTML(html)

In [18]:
treatment = "J25"
n = 200

mle = tools_available["MAGeCK_MLE"]["J25_vs_J4"]["J25_vs_J4.gene_summary.txt"]
mle['default_rank'] = mle[treatment + '|beta'].rank(method="dense")
mle = mle[["Gene", "default_rank"]].rename(columns={"Gene": "id", "default_rank": "mle_rank"})

rra = tools_available["MAGeCK_RRA"]["J25_vs_J4"]["J25_vs_J4.gene_summary.txt"]
rra = rra[["id", "neg|rank"]].rename(columns={"neg|rank": "rra_rank"})

bagel = tools_available["BAGEL"]["J25_vs_J4"]["J25_vs_J4_BAGEL_output.bf"]
bagel['default_rank'] = bagel['BF'].rank(method="dense", ascending=False)
bagel = bagel[["GENE", "default_rank"]].rename(columns={"GENE": "id", "default_rank": "bagel_rank"})

in_house = tools_available["in_house_method"]["J25_vs_J4"]["J25_vs_J4_all-elements_in-house.txt"]
in_house['default_rank'] = in_house['score'].rank(method="dense")
in_house = in_house[["Gene", "default_rank"]].rename(columns={"Gene": "id", "default_rank": "in_house_rank"})


gsea = tools_available["GSEA-like"]["J25_vs_J4"]["J25_vs_J4_all-elements_GSEA-like.txt"]
gsea['default_rank'] = gsea['NES'].rank(method="dense")
gsea = gsea[["pathway", "default_rank"]].rename(columns={"pathway": "id", "default_rank": "gsea_rank"})


pdList = [mle, rra, bagel, in_house, gsea]

df_merged = reduce(lambda  left,right: pd.merge(left,right,on=['id'],
                                            how='outer'), pdList)

df_merged_reduced = df_merged[(df_merged["mle_rank"] < n) & (df_merged["rra_rank"] < n) & (df_merged["bagel_rank"] < n) & (df_merged["in_house_rank"] < n) & (df_merged["gsea_rank"] < n)]

aggregated_ranks = mc4_aggregator(df_merged_reduced, header_row = 0, index_col = 0, iterations = 200)

for rank in range(1, len(aggregated_ranks.keys())+1):
    for key in aggregated_ranks.keys():
        if aggregated_ranks[key] == rank:
            gene = df_merged_reduced.loc[[key]]
            print(aggregated_ranks[key], df_merged_reduced.loc[[key]].id.values[0], sep="\t")
    
