In [1]:
import os

import altair as alt
import glob
import ipywidgets as widgets
import numpy as np
import pandas as pd
import re

from ipywidgets import HBox, VBox, Layout

## Remove Altair max rows
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [2]:
token = 'all_tools'

print(os.getcwd())

/media/paularthur/data/Projets/Screens/PitViper/PitViper/results


In [3]:
class Tool:
    def __init__(self, token, tool_name, display_name = None, file_pattern = '', sep = '\t', uncertainty_pattern = '', score_pattern = '', name_pattern = '',):
        self.token = token
        self.tool_name = tool_name
        
        if not display_name:
            self.display_name = tool_name
        else:
            self.display_name = display_name
        
        # Store an array of all comparisons
        # TODO: check if path exists, if 'comparisons' is not empty
        self.comparisons = np.array([d for d in os.listdir(f'{self.token}/{self.tool_name}/')])
        
        # Store a list of all path to files.
        # TODO: check if 'files' is not empty
        self.files = [glob.glob(f'results/{self.token}/{self.tool_name}/{comparison}/{file_pattern}') for comparison in self.comparisons]
        
        # Check if the number of list of files is equal to the number of comparisons.
        if len(self.comparisons) != len(self.files):
            raise Exception(f'Error: The number of comparisons found ({len(self.comparisons)}) is different from the number of files ({len(self.files)}).')
        
        # Check if only one file is found for each comparison.
        for i in range(len(self.comparisons)):
            if len(self.files[i]) != 1:
                raise Exception(f'Error: There is not exactly one file found for comparison {self.comparisons[i]}')
                
        # Store full tables in a dictionnary
        self.full_tables = dict(zip(self.comparisons, [pd.read_csv(file[0], sep=sep) for file in self.files]))
        self.tables = dict.fromkeys(self.comparisons, None)
        self.genes = []
        
        for comparison in self.comparisons:
            columns = self.full_tables[comparison].columns
            uncertainty_col = list(filter(re.compile(uncertainty_pattern).match, columns))
            score_col = list(filter(re.compile(score_pattern).match, columns))
            name_col = list(filter(re.compile(name_pattern).match, columns))
            if len(uncertainty_col) != 1:
                raise Exception(f'Error: There is not exactly one column extracted for uncertainty: {uncertainty_col}')
            if len(score_col) != 1:
                raise Exception(f'Error: There is not exactly one column extracted for score: {score_col}')
            if len(name_col) != 1:
                raise Exception(f'Error: There is not exactly one column extracted for name: {name_col}')
            self.tables[comparison] = self.full_tables[comparison][[name_col[0], uncertainty_col[0], score_col[0]]]
            self.tables[comparison].columns = ['Name', 'Uncertainty', 'Score']
            self.genes.extend(list(self.tables[comparison].Name))
        self.genes = list(set(self.genes))
            
            
    def get_results(self, comparison, min_uncertainty = -np.Inf, max_uncertainty = np.Inf, min_score = -np.Inf, max_score = np.Inf):
        table = self.tables[comparison]
        return table[(table['Uncertainty'] > min_uncertainty) & (table['Uncertainty'] < max_uncertainty) & (table['Score'] < max_score) & (table['Score'] > min_score)]
    

            

In [4]:
mle = Tool(token, 'MAGeCK_MLE', file_pattern="*.gene_summary.txt", sep='\t', uncertainty_pattern='.*\|fdr', score_pattern='.*\|beta', name_pattern='Gene')
mle_df = mle.get_results(comparison='D28_DRUG_vs_D28_DMSO', max_uncertainty=0.05, max_score=0.0)
mle_df.sort_values(['Uncertainty', 'Score'])

Unnamed: 0,Name,Uncertainty,Score
10665,UBXN1,0.0,-1.7322
4231,PTEN,0.0,-1.5732
2809,PLAA,0.0,-1.3989
13737,DPM3,0.0,-1.3757
11473,RAD23B,0.0,-1.2553
12773,ELOC,0.0,-1.1301
11080,USP38,0.0,-1.1226
14069,MOGS,0.0,-1.1164
173,DDI2,0.0,-1.1127
17565,PTMA,0.0,-1.0962


In [5]:
rra = Tool(token, 'MAGeCK_RRA', display_name="MAGeCK_RRA_negative", file_pattern="*.gene_summary.txt", sep='\t', uncertainty_pattern='neg\|fdr', score_pattern='neg\|lfc', name_pattern='id')
rra_df = rra.get_results(comparison='D28_DRUG_vs_D28_DMSO', max_uncertainty=0.05, max_score=0.0)
rra_df

Unnamed: 0,Name,Uncertainty,Score
0,DPM3,0.00165,-2.2145
1,PDAP1,0.00165,-1.028
2,MOGS,0.00165,-1.8448
3,RAD23B,0.003713,-1.9562
4,UGGT1,0.016832,-1.5792
5,PTEN,0.018977,-2.7744
6,IPO5,0.041254,-1.1043
7,FAF1,0.041254,-1.5441
8,ALG3,0.041254,-1.4422


In [6]:
rra_pos = Tool(token, 'MAGeCK_RRA', display_name="MAGeCK_RRA_positive", file_pattern="*.gene_summary.txt", sep='\t', uncertainty_pattern='pos\|fdr', score_pattern='pos\|lfc', name_pattern='id')
rra_pos_df = rra_pos.get_results(comparison='D28_DRUG_vs_D28_DMSO', max_uncertainty=0.05, min_score=0.0)
rra_pos_df

Unnamed: 0,Name,Uncertainty,Score
13760,RPS3A,0.04644,2.9583
18939,KIDINS220,0.047568,0.98832
18940,METTL23,0.047568,1.1594
18941,TRA2A,0.04644,1.3888
18942,SCAF4,0.039343,1.4705
18943,GMEB2,0.025028,1.1412
18944,SLTM,0.02184,1.0823
18945,SAMHD1,0.020111,0.93414
18946,KLF16,0.017492,1.2928
18947,LARP4B,0.015205,1.1412


In [7]:
bagel = Tool(token, 'BAGEL', file_pattern="*_output.bf", sep='\t', uncertainty_pattern='BF', score_pattern='BF', name_pattern='GENE')
bagel_df = bagel.get_results(comparison='D28_DRUG_vs_D28_DMSO', min_score=0.0)
bagel_df.sort_values('Score', ascending=False)

Unnamed: 0,Name,Uncertainty,Score
17543,UBXN1,18.020,18.020
13245,RAD23B,17.531,17.531
4662,DPM3,16.875,16.875
12988,PTEN,16.680,16.680
12196,PLAA,16.064,16.064
...,...,...,...
11481,OSER1,0.009,0.009
712,ANKRD36,0.009,0.009
493,AKAP12,0.007,0.007
4411,DHRS2,0.007,0.007


In [8]:
crisphiermix = Tool(token, 'CRISPhieRmix', file_pattern="*.txt", sep=',', uncertainty_pattern='FDR', score_pattern='top_3_mean_log2FoldChange', name_pattern='gene')
crisphiermix_df = crisphiermix.get_results(comparison='D28_DRUG_vs_D28_DMSO', max_uncertainty=0.05, max_score=0.0)
crisphiermix_df.sort_values('Uncertainty')

Unnamed: 0,Name,Uncertainty,Score
13943,RPS29,0.000000e+00,-3.381752
12989,PTEN,0.000000e+00,-2.317175
17548,UBXN1,0.000000e+00,-2.503328
6922,H2BC6,7.454355e-16,-1.573015
12197,PLAA,1.731060e-12,-1.977613
...,...,...,...
16930,TNS4,4.807307e-02,-0.828786
14312,SEC14L2,4.833849e-02,-0.352581
3645,CSH1,4.886981e-02,-0.782588
14000,RSC1A1,4.900387e-02,-0.826719


In [9]:
gsea = Tool(token, 'SSREA', file_pattern="*_all-elements_SSREA.txt", sep='\t', uncertainty_pattern='padj', score_pattern='NES', name_pattern='pathway')
gsea_df = gsea.get_results(comparison='D28_DRUG_vs_D28_DMSO', max_uncertainty=1.0, max_score=0.0)
gsea_df.sort_values('Uncertainty')

Unnamed: 0,Name,Uncertainty,Score
5559,FAM72B,0.215165,-1.489972
12197,PLAA,0.329144,-1.487099
5381,FAF1,0.339520,-1.486387
10658,NOD2,0.339520,-1.486325
3020,CHRAC1,0.473207,-1.484130
...,...,...,...
5571,FAM83E,0.999515,-0.417368
13488,REG4,0.999515,-0.409775
1908,C21orf91,0.999515,-0.416939
16007,TAF1D,0.999515,-0.428786


In [10]:
ih = Tool(token, 'directional_scoring_method', file_pattern="*_all-elements_directional_scoring_method.txt", sep='\t', uncertainty_pattern='score$', score_pattern='score$', name_pattern='Gene')
ih_df = ih.get_results(comparison='D28_DRUG_vs_D28_DMSO', max_score=0.0)
ih_df.sort_values('Uncertainty')

Unnamed: 0,Name,Uncertainty,Score
0,UBXN1,-16.327667,-16.327667
1,PTEN,-14.031637,-14.031637
2,RAD23B,-9.296982,-9.296982
3,DPM3,-9.219244,-9.219244
4,USP38,-6.361112,-6.361112
5,ALG3,-6.258883,-6.258883
6,PLAA,-4.943547,-4.943547
7,MOGS,-4.923976,-4.923976
8,FAF1,-4.265793,-4.265793
9,UBE4B,-3.731869,-3.731869


In [11]:
class Results:
    def __init__(self, results):
        self.tools = {}
        self.comparisons = []
        for tool in results:
            if type(tool) is Tool:
                self.tools[tool.display_name] = tool
                for comparison in tool.comparisons:
                    if not comparison in self.comparisons:
                        self.comparisons.append(comparison)
            else:
                raise Exception(f'Type of {tool} is not Tool')
        
    def _orientations(self, orientation):
        if orientation == '<':
            opposite_orientation = '>='
        elif orientation == '>':
            opposite_orientation = '<='
        elif orientation == '>=':
            opposite_orientation = '<'
        elif orientation == '<=':
            opposite_orientation = '>'
        else:
            raise Exception(f"`{orientation}` is not a valid orientation.")
        return opposite_orientation

          
    def plot_results(self):
        tools_widget = widgets.SelectMultiple(options=set(self.tools.keys()), description="Tool:", value=tuple(self.tools.keys()),)
        comparisons_widget = widgets.Dropdown(options=set(list(self.tools[list(self.tools)[0]].comparisons)), description="Comparison:")
        element_widget = widgets.Combobox(placeholder='Comma-separated list:', options=self.tools[list(self.tools)[0]].genes, description="Element(s):", ensure_option=False,)
        uncertainty_widget = widgets.FloatText(value=0.05, description='', layout=Layout(width='auto', height='auto'),)
        uncertainty_orientation_widget = widgets.Select(options=[">", "<", "<=", ">="], description="Uncertainty", value="<", rows=1, layout=Layout(width='auto', height='auto'))
        score_widget = widgets.FloatText(value=0.0, description='', layout=Layout(width='auto', height='auto'),)
        score_orientation_widget = widgets.Select(options=[">", "<", "<=", ">="], description="Score", value="<", rows=1, layout=Layout(width='auto', height='auto'))
        color_sig_widget = widgets.ColorPicker(concise=False, description="Significant color:", value="red")
        color_non_widget = widgets.ColorPicker(concise=False, description="Non-significant color:", value="gray")
        button = widgets.Button(description="Show plot")
        
        def _plot(event):
            tools = tools_widget.value
            comparison = comparisons_widget.value
            elements = element_widget.value.split(',')
            uncertainty = float(uncertainty_widget.value)
            uncertainty_orientation = uncertainty_orientation_widget.value
            score = float(score_widget.value)
            score_orientation = score_orientation_widget.value
            color_sig = color_sig_widget.value
            color_non_sig = color_non_widget.value
            print(tools)
            for tool in tools:
                print(f'Tool: {tool}\nComparison: {comparison}\nElement: {elements}\nUncertainty: {uncertainty_orientation} {uncertainty}\nScore: {score_orientation} {score}\nColors: [{color_sig}, {color_non_sig}]')
                
                opposite_score_orientation = self._orientations(score_orientation)
                opposite_uncertainty_orientation = self._orientations(uncertainty_orientation)
                
                significant_label = f"Uncertainty {uncertainty_orientation} {uncertainty}"
                non_significant_label = f"Uncertainty {opposite_uncertainty_orientation} {uncertainty}"
                highlight_label = "Gene(s) of interest"
                
                source = self.tools[tool].tables[comparison]
                source["Rank"] = source["Score"].rank(ascending=True, method="first")
                source.loc[eval(f'(source.Uncertainty {uncertainty_orientation} {uncertainty}) & (source.Score {score_orientation} {score})'), "Label"] = significant_label
                source.loc[eval(f'(source.Uncertainty {opposite_uncertainty_orientation} {uncertainty}) | (source.Score {opposite_score_orientation} {score})'), "Label"] = non_significant_label
                               
                source.loc[source.Name.isin(elements), "Label"] = highlight_label
                domains = [ highlight_label, significant_label, non_significant_label,]
                colors = ["blue", color_sig, color_non_sig]
                
                chart = alt.Chart(source, title=f'{tool}: {comparison}').mark_circle(size=60).encode(
                                x=alt.X('Rank:Q', axis=alt.Axis(title='Rank')),
                                y=alt.Y('Score:Q', axis=alt.Axis(title='Score')),
                                tooltip=["Name", "Score", "Uncertainty", "Label", "Rank",],
                                color=alt.Color( "Label", scale=alt.Scale(domain=domains, range=colors),legend=alt.Legend(title="Significativity:")),
                                order=alt.Order("Label:N", sort='descending'),
                            ).interactive().properties(width=800, height=400)
                
                line = alt.Chart(pd.DataFrame({"y": [0]})).mark_rule().encode(y="y")
                text = ( alt.Chart(source.query("Label == 'Gene(s) of interest'"))
                    .mark_text(dy=10, dx=20, color="blue")
                    .encode(
                        x=alt.X("Rank:Q"),
                        y=alt.Y("Score:Q"),
                        text=alt.Text("Name")))
                chart = chart + line + text

                display(chart)

        display(tools_widget)
        display(comparisons_widget)
        display(element_widget)
        display(HBox([uncertainty_orientation_widget, uncertainty_widget]))
        display(HBox([score_orientation_widget, score_widget]))
        display(color_sig_widget)
        display(color_non_widget)
        display(button)
        button.on_click(_plot)
    
        
    def plot_gene(self):
        element_widget = widgets.Text(placeholder='Name of element:', description="Element(s):", ensure_option=True,)
        control_widget = widgets.Dropdown(options=set([comparison.split('_vs_')[1] for comparison in list(self.tools[list(self.tools)[0]].comparisons)]), description='Control condition:', style=dict(description_width='250px'), layout=dict(width='350px'))
        forms = [
            VBox([
                widgets.HTML(value=f"<b>{tool}</b>:"),
                HBox([widgets.Select(options=[">", "<", "<=", ">="], description=f"Uncertainty", value="<", rows=1, style=dict(description_width='250px'), layout=Layout(width='auto', height='auto')), widgets.FloatText(value=0.05, description='', layout=Layout(width='auto', height='auto'),)]),
                HBox([widgets.Select(options=[">", "<", "<=", ">="], description=f"Score", value="<", rows=1, style=dict(description_width='250px'), layout=Layout(width='auto', height='auto')), widgets.FloatText(value=0.0, description='', layout=Layout(width='auto', height='auto'),)])
                ]) for tool in list(self.tools)
            ]
        button = widgets.Button(description="Show plot")
        
        
        def clicked(event):
            element = element_widget.value
            control_condition = control_widget.value
            comparisons = [comparison for comparison in self.comparisons if comparison.endswith(control_condition)]
            
            if element == "":
                raise Exception("Element is an empty string. Please enter an element.")
            
            for tool in forms:
                rows = []
                for comparison in comparisons:
                    treatment = comparison.split('_vs_')[0]
                    
                    uncertainty = tool.children[1].children[1].value
                    uncertainty_orientation = tool.children[1].children[0].value
                    
                    score = tool.children[2].children[1].value
                    score_orientation = tool.children[2].children[0].value
                    
                    tool_name = re.sub(r'<.*?>', '', tool.children[0].value)[:-1]

                    # Create opposite label
                    opposite_score_orientation = self._orientations(score_orientation)
                    opposite_uncertainty_orientation = self._orientations(uncertainty_orientation)
                    significant_label = f"Uncertainty {uncertainty_orientation} {uncertainty}"
                    non_significant_label = f"Uncertainty {opposite_uncertainty_orientation} {uncertainty}"
                    
                    source = self.tools[tool_name].tables[comparison]
                    source = source.loc[source.Name == element].copy()
                    # TODO: should be the opposite for BAGEL!
                    print(source)
                    source.loc[eval(f'(source.Uncertainty {uncertainty_orientation} {uncertainty}) & (source.Score {score_orientation} {score})'), "Label"] = significant_label
                    source.loc[eval(f'(source.Uncertainty {opposite_uncertainty_orientation} {uncertainty}) | (source.Score {opposite_score_orientation} {score})'), "Label"] = non_significant_label
                    source['Condition'] = str(treatment)
                    rows.append(source)
                    
                source = pd.concat(rows)
                control_row = {"Name": element, "Label": "Baseline", "Uncertainty": 1, "Score": 0, 'Condition': control_condition}
                source = source.append(control_row, ignore_index=True)
                domains = [significant_label, non_significant_label, "Baseline"]
                colors = ["red", "grey", "black"]
                chart = alt.Chart(source).mark_circle(size=60).mark_point(filled=True, size=100).encode(
                    y=alt.Y('Score', axis=alt.Axis(title='Score')),
                    x=alt.X('Condition', axis=alt.Axis(title='Condition')),
                    color=alt.Color('Label', scale=alt.Scale(domain=domains, range=colors), legend=alt.Legend(title='Uncertainty')),
                    tooltip=['Name', 'Uncertainty', 'Score']
                ).properties(title=f'{element} - {tool_name}')
                display(chart)

        display(element_widget)
        display(control_widget)
        display(VBox(forms))
        display(button)
        button.on_click(clicked)
        
    def integration(self):
                
        # Comparison widgets, select comparison to use
        comparison_widget = widgets.Dropdown(options=self.tools[list(self.tools.keys())[0]].comparisons,
                                              description="Comparison:")

        # Selection widget, select if intersection or union of results should be used
        selection_widgets = widgets.ToggleButtons(options=["Intersection", "Union"],
                                                  description="Selection mode:",
                                                  tooltips=["Use elements at intersection of all selected methods", 
                                                            "Use union of elements of all selected methods",],)

        # Tools
        tool_forms = [
            VBox([
                widgets.HTML(value=f"<b>{tool}</b>:"),
                HBox([widgets.Select(options=[">", "<", "<=", ">="], description=f"Uncertainty", value="<", rows=1, style=dict(description_width='250px'), layout=Layout(width='auto', height='auto')), widgets.FloatText(value=0.05, description='', layout=Layout(width='auto', height='auto'),)]),
                HBox([widgets.Select(options=[">", "<", "<=", ">="], description=f"Score", value="<", rows=1, style=dict(description_width='250px'), layout=Layout(width='auto', height='auto')), widgets.FloatText(value=0.0, description='', layout=Layout(width='auto', height='auto'),)])
                ]) for tool in list(self.tools)
            ]
        
        # Button
        button = widgets.Button(description="Show plot")
        
        def ranking(params):
            tool_name, comparison, selection, uncertainty, uncertainty_orientation, score, score_orientation = params
            source = self.tools[tool_name].tables[comparison]
            source["Rank"] = source[['Uncertainty', 'Score']].rank(method="dense").copy()
            print(source)
        
        # def venn_diagram(params):
        #     tool_name, comparison, selection, uncertainty, uncertainty_orientation, score, score_orientation = params
        #     treatment, control = comparison.split("_vs_")
        #     ranks, occurences = ranking(treatment, control, token, tools_available, params)
        #     if selection_widgets.value == "Intersection":
        #         df = pd.DataFrame(
        #             occurences.eq(occurences.iloc[:, 0], axis=0).all(1),
        #             columns=["intersection"],
        #         )
        #         genes_list = df.loc[df.intersection == True].index
        #     else:
        #         df = pd.DataFrame(
        #             occurences.eq(occurences.iloc[:, 0], axis=0).any(1), columns=["union"]
        #         )
        #         genes_list = df.loc[df.union == True].index
        #     display(
        #         HTML(
        #             """<p style="color:white;font-weight: bold;background-color: orange;padding: 0.5em;">Venn diagram: %s</p>"""
        #             % selection_widgets.value
        #         )
        #     )
        #     show_parameters(params)
        #     plot_venn(occurences)
        #     print("Genes at %s of all methods:" % selection_widgets.value)
        #     for gene in genes_list:
        #         print(gene)
        
        def clicked(event):
            # General values
            comparison = comparison_widget.value
            selection = selection_widgets.value
            
            # Tool-specific values
            for tool in tool_forms:           
                tool_name = re.sub(r'<.*?>', '', tool.children[0].value)[:-1]
                uncertainty = tool.children[1].children[1].value
                uncertainty_orientation = tool.children[1].children[0].value
                
                score = tool.children[2].children[1].value
                score_orientation = tool.children[2].children[0].value
                
                params = (tool_name, comparison, selection, uncertainty, uncertainty_orientation, score, score_orientation)
                ranking(params)
                
            
        
        
        display(comparison_widget)
        display(selection_widgets)
        display(VBox(tool_forms))
        display(button)
        button.on_click(clicked)

In [12]:
tools = [mle, rra, bagel, crisphiermix, gsea, ih, rra_pos]

results = Results(results=tools)

In [13]:
results.integration()

Dropdown(description='Comparison:', options=('D28_DRUG_vs_d0', 'D28_DMSO_vs_d0', 'D28_DRUG_vs_D28_DMSO'), valu…

ToggleButtons(description='Selection mode:', options=('Intersection', 'Union'), tooltips=('Use elements at int…

VBox(children=(VBox(children=(HTML(value='<b>MAGeCK_MLE</b>:'), HBox(children=(Select(description='Uncertainty…

Button(description='Show plot', style=ButtonStyle())

In [14]:
results.plot_results()

SelectMultiple(description='Tool:', index=(4, 2, 5, 0, 1, 3, 6), options=('CRISPhieRmix', 'SSREA', 'MAGeCK_RRA…

Dropdown(description='Comparison:', options=('D28_DRUG_vs_d0', 'D28_DRUG_vs_D28_DMSO', 'D28_DMSO_vs_d0'), valu…

Combobox(value='', description='Element(s):', options=('PLGLB2', 'LYSMD1', 'MYO5B', 'SMCO4', 'ZNF730', 'KRT1',…

HBox(children=(Select(description='Uncertainty', index=1, layout=Layout(height='auto', width='auto'), options=…

HBox(children=(Select(description='Score', index=1, layout=Layout(height='auto', width='auto'), options=('>', …

ColorPicker(value='red', description='Significant color:')

ColorPicker(value='gray', description='Non-significant color:')

Button(description='Show plot', style=ButtonStyle())

In [15]:
results.plot_gene()

Text(value='', description='Element(s):', placeholder='Name of element:')

Dropdown(description='Control condition:', layout=Layout(width='350px'), options=('d0', 'D28_DMSO'), style=Des…

VBox(children=(VBox(children=(HBox(children=(Select(description='MAGeCK_MLE Uncertainty', index=1, layout=Layo…

Button(description='Show plot', style=ButtonStyle())