In [2]:
# start coding here
token = snakemake.params

# PitViper Notebook Report

This notebook was generated automatically by PitViper.

It can be used in two ways:

1. By using the functions already created and present in the following cells.

2. By creating new cells and writing python3 code in them.

The graphs are generated using the python library [Altair](https://altair-viz.github.io/index.html). It is possible to download each graph in SVG format from the drop-down menu at the top right of each graph.

The next cell allows to call the functions already created for the visualization of the results.

In [3]:
import sys
import os

modules_path = ['workflow/notebooks/', "../../../workflow/notebooks/"]
for module in modules_path:
    module_path = os.path.abspath(os.path.join(module))
    if module_path not in sys.path:
        sys.path.append(module_path)

from functions import * 

working_directory_update(snakemake.output[0])

### rpy2

Next cell load rpy2 which allow to define cells running R code by adding `%%R` at the first line of the cell. 

Note: it's possible to forward some python-specific objects (such as list or pandas dataframe) to R cells using `-i` parameters followed by objects name (`%%R -i token`, token string is then available as an R object).

In [4]:
%load_ext rpy2.ipython

In [5]:
%%R

source("workflow/notebooks/functions_R.R")

## Process data

Next function scan `results/` directory to retrieve all results.

`tools_available` is python dictionnary in which all data are stored in a comprehensive manner.

In [6]:
results_directory, tools_available = setup_step_1(token)

## Mapping Quality Control

If available, mapping quality control metrics will be shown by next function (`show_mapping_qc`).

In [7]:
show_mapping_qc(token)

## Read count distribution

Normalized read count distribution for all replicates will be shown by calling `show_read_count_distribution` function.

In [8]:
alt.data_transformers.disable_max_rows()

show_read_count_distribution(token)

## Principal component analysis

In [9]:
pca_counts(token)

## Tools global results

In [10]:
snake_plot(results_directory, tools_available)

## sgRNA read counts by element

In [11]:
show_sgRNA_counts(token) 

## results by tool and by element

In [12]:
CRISPhieRmix_results(results_directory, tools_available)

In [13]:
GSEA_like_results(results_directory, tools_available)

In [14]:
in_house_method_results(results_directory, tools_available)

In [15]:
MAGeCK_RRA_results(results_directory, tools_available)

In [16]:
MAGeCK_MLE_results(results_directory, tools_available)

In [17]:
BAGEL_results(results_directory, tools_available)

## EnrichR

In [18]:
enrichr_plots(tools_available)

## GeneMania

In [19]:
genemania_link_results(tools_available)

## Data exploration charts

In [20]:
intersection(tools_available, token)

In [21]:
l = [0,1,2,3,4,5]
l[2:]

In [22]:
call_form(tools_available)

## depmap integration with "Essentiality prioritisation" results

In [None]:
# dplyr = importr("dplyr")
# tibble = importr("tibble")
# stringr = importr("stringr")
# depmap = importr("depmap")
# experimentHub = importr("ExperimentHub")

# from rpy2.robjects.lib.dplyr import DataFrame
# from rpy2.robjects import rl

# import rpy2.ipython.html
# rpy2.ipython.html.init_printing()

In [None]:
# eh = experimentHub.ExperimentHub()
# rnai = depmap.depmap_rnai()

# rnai

In [None]:
# dataf = (
#     DataFrame(rnai)
#     .filter(rl('grepl("HAEMATOPOIETIC_AND_LYMPHOID_TISSUE", cell_line)'))
# )

# dataf.head()

In [None]:
# with localconverter(ro.default_converter + pandas2ri.converter):
#     py_rnai = ro.conversion.rpy2py(dataf)

# py_rnai

In [None]:
# genes = ['ADNP', 'SMARCA5', 'FTSJ3', 'TRRAP', 'ASH2L', 'NPM1', 'TRIM28',
#        'SUPT16H', 'USP7', 'PRPF31', 'CHAF1A', 'KANSL1', 'EP400', 'TAF6',
#        'RPA2', 'RRM1', 'NIPBL', 'RUNX1', 'SMARCB1', 'DDX21', 'PAF1', 'PSMB1',
#        'DDB1', 'NOC2L', 'MCM6', 'GLIS2', 'BRCA1', 'RUVBL1', 'POLA1', 'RPA1',
#        'NAA10', 'PHF5A', 'MYC', 'SMNDC1', 'FBL', 'CHD2', 'CHD1', 'KDM1A',
#        'DNMT1', 'TAF1', 'CHD4', 'WDR82', 'TAF10', 'UBA1', 'ACTL6A', 'ARID1A',
#        'HCFC1', 'SUPT6H', 'SSRP1', 'BPTF', 'TAF2', 'CTCF', 'HUWE1', 'WDR61',
#        'SMN1', 'PHIP', 'BRD4', 'DMAP1', 'DNTTIP2', 'NAT10', 'RAD51', 'LMO2',
#        'SAFB', 'ATR', 'BOP1', 'MLL2', 'MIS18A', 'YY1', 'WDR5', 'NCL', 'POLR2A',
#        'SAP130', 'POLR2B', 'SFPQ', 'SF3B1', 'BRCA2', 'AURKB', 'BCL2']

# essential_genes = py_rnai.loc[py_rnai.gene_name.isin(genes)][["gene_name", "cell_line", "dependency"]].pivot(index='gene_name', columns='cell_line', values='dependency').dropna()
# net.load_df(essential_genes)
# net.cluster()
# net.widget()

In [None]:
# for gene in [ 'ACTL6A', 'ASH2L', 'ATR', 'AURKB', 'BOP1', 'BPTF', 'BRD4', 'CHAF1A', 'CHD4', 'CTCF', 'DDB1', 'DDX21', 'DMAP1', 'DNMT1', 'EP400', 'HCFC1', 'HUWE1', 'KDM1A', 'MCM6', 'MYC', 'NAA10', 'NAT10', 'NCL', 'NPM1', 'PAF1', 'PHF5A', 'POLA1', 'POLR2A', 'POLR2B', 'PRPF31', 'RAD51', 'RPA1', 'RPA2', 'RRM1', 'RUVBL1', 'SAFB', 'SF3B1', 'SFPQ', 'SMARCA5', 'SMARCB1', 'SMNDC1', 'SSRP1', 'SUPT16H', 'SUPT6H', 'TAF1', 'TRIM28', 'TRRAP', 'UBA1', 'YY1']:
#     print(gene)

In [None]:
# for gene in [ 'ADNP', 'ARID1A', 'AURKB', 'BCL2', 'BPTF', 'BRCA1', 'BRCA2', 'CHD1', 'CHD2', 'CTCF', 'DMAP1', 'DNMT1', 'GLIS2', 'HUWE1', 'KDM1A', 'LMO2', 'MCM6', 'NAT10', 'NCL', 'NOC2L', 'NPM1', 'PHIP', 'RUNX1', 'SAFB', 'SAP130', 'SMARCB1', 'TAF2', 'TAF6', 'TRIM28', 'USP7', 'YY1']:
#     print(gene)

In [None]:
# %%R

# library(depmap)
# library("ExperimentHub")
# library(ggplot2)

# ## create ExperimentHub query object
# eh <- ExperimentHub()

# # rnai <- eh[["EH2260"]]

# # rnai

In [None]:
# %%R

# eh

In [None]:
# %%R -i

# query.gene <- "MYC"
# query.cell_line <- "KASUMI2_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE"
# query.cell_line_name <- "KASUMI2"

# essential.genes <- res %>%
#     filter(Score < 0.05) %>%
#     pull(Name)


# rnai.filtered <- rnai %>%
#     filter(cell_line == query.cell_line) %>%
#     mutate(essential = ifelse(gene_name %in% essential.genes, "essential", "non-essential")) %>%
#     filter(!is.na(dependency))


# gene.dependency <- rnai.filtered %>%
#     filter(gene_name == query.gene) %>%
#     pull(dependency)


# rnai.filtered %>%
#     ggplot(aes(x=dependency, fill=essential)) + 
#     geom_density(alpha=0.4) + 
#     theme_classic() +
#     scale_fill_manual(values=c("#E69F00", "#999999")) +
#     geom_vline(xintercept = 0) +
#     geom_rug(data = subset(rnai.filtered, essential == "essential"), col="#E69F00", alpha=0.5, sides = "t") +
#     geom_rug(data = subset(rnai.filtered, essential == "non-essential"), col="#999999", alpha=0.5, outside = TRUE, sides = "t") +
#     coord_cartesian(clip = "off") +
#     geom_vline(xintercept = gene.dependency, linetype="dashed", color = "red", size=1) +
#     ggtitle(paste0(query.cell_line_name, "dependencies (", query.gene," highlighted)")) +
#     theme(plot.title = element_text(vjust = 4),
#           plot.margin = margin(10, 10, 10, 10))


In [None]:
# %%R

# library(ggridges)
# library(tidyr)
# library(forcats)

# rnai.filtered <- rnai %>%
#     filter(grepl("HAEMATOPOIETIC_AND_LYMPHOID_TISSUE", cell_line)) %>%
#     mutate(essential = ifelse(gene_name %in% essential.genes, "essential", "non-essential")) %>%
#     filter(!is.na(dependency))


# rnai.filtered %>%
#     mutate(cell_line = fct_reorder(.f = cell_line, .x = -dependency, .fun = median)) %>%
#     filter(essential == "essential") %>%
#     ggplot(aes(x = `dependency`, y = cell_line, fill = stat(x))) +
#         geom_density_ridges_gradient(scale = 3, rel_min_height = 0.01, quantile_lines = TRUE, quantiles = 2) +
#         scale_fill_viridis_c() +
#         geom_vline(xintercept=-0.5, color = "red", size=1) +
#         geom_vline(xintercept=0, color = "black", size=1, linetype="dashed") +
#         theme_classic()


In [None]:
# import pandas as pd
# import rpy2.robjects as robjects
# from rpy2.robjects import pandas2ri# Defining the R script and loading the instance in Python
# r = robjects.r
# r['source']('preprocess.R')# Loading the function we have defined in R.
# filter_country_function_r = robjects.globalenv['filter_country']# Reading and processing data
# df = pd.read_csv("Country-Sales.csv")#converting it into r object for passing into r function
# df_r = pandas2ri.ri2py(df)
# #Invoking the R function and getting the result
# df_result_r = filter_country_function_r(df_r, 'USA')
# #Converting it back to a pandas dataframe.
# df_result = pandas2ri.py2ri(df_result_r)

In [None]:
# import numpy as np
# import pandas as pd
# from clustergrammer2 import net, Network, CGM2
# import warnings
# # warnings.filterwarnings('ignore')

In [None]:
# df = pd.DataFrame(np.random.randint(0,10,size=(10, 4)), columns=['BRCA2','TP53','SPNS2','MYC'])

# print(df)

# # load matrix file
# net.load_df(df)

# # cluster using default parameters
# # net.cluster(enrichrgram=True)

# # make interactive widget
# net.widget()