# Install required packages

In [1]:
# !pip install revealer

# Import functions to prepare input files and run REVEALER

In [2]:
from revealer.MutMaker import produce_mutation_file
from revealer.REVEALER_Cython import runREVEALER
from revealer.REVEALER_test import verify_test

In [3]:
# Set if your notebook is not at jupyter root
import os
os.chdir('/home/jmj7858/software/REVEALER/example_notebook')

# Downloading CCLE mutation information file.
# https://depmap.org/portal/download/all/?releasename=DepMap+Public+23Q2&filename=OmicsSomaticMutations.csv

In [28]:
import requests

article_id = "25880521"
version = "1"

article = requests.get(f"https://api.figshare.com/v2/articles/{article_id}/versions/{version}").json()

# Find link for OmicsSomaticMutations.csv and download
for file in article["files"]:
    if file["name"] == "OmicsSomaticMutations.csv":
        download_url = file["download_url"]
        output_filename = "sample_input/OmicsSomaticMutations.csv"  
        response = requests.get(download_url)
        # Check if the request was successful
        if response.status_code == 200:
            # Save the file locally with the specified output filename
            with open(output_filename, "wb") as file:
                file.write(response.content)
            print(f"File downloaded successfully: {output_filename}")
        else:
            print("Failed to download the file.")


File downloaded successfully: sample_input/OmicsSomaticMutations.csv


# Run input preparation from maf file (simplified)

In [4]:
produce_mutation_file(maf_input_file='sample_input/OmicsSomaticMutations.csv', # Input maf file
                      gct_output_file_prefix='CCLE', # Prefix for output file
                      out_folder='sample_input',
                      file_separator=',',
                      protein_change_identifier='ProteinChange',
                      col_genename = 'HugoSymbol',
                      col_class = 'VariantType',
                      col_sample = 'ModelID',
                      mode = 'mutall')

Reading input file...
Start making gct by class.
Start getting sample information.
Start collecting allele information for each feature.
Start creating mutation dataframe.
Start removing feature with more than total_ratio.
Start generating gmt file.
Writing Mut All result to gct.


# Convert annotation between DepMap and CCLE to match with our signature file

In [5]:
# !python DepMapToCCLE.py sample_input/NameConvert.csv sample_input/CCLE_class.gct sample_input/CCLE_class_rename.gct
from DepMapToCCLE import run

run('sample_input/NameConvert.csv', 'sample_input/CCLE_Mut_All.gct', 'sample_input/CCLE_Mut_All_rename.gct')

# Run main part of REVEALER

In [7]:
# Input too large, please contact jim095@ucsd.edu for original file
runREVEALER(target_file='sample_input/CCLE_complete_sigs.gct',
            feature_files=['sample_input/CCLE_Mut_All_rename.gct'],
            gmt_file='sample_input/CCLE_Mut_All.gmt',
            seed_name=['NFE2L2_Mut_All'],
            out_folder='sample_output/NRF2',
            prefix='CCLE_NRF2',
            target_name='NFE2L2.V2',
            if_pval=False, # Calculating p-value takes long time
            if_bootstrap=False, # Same, takes long time
            if_intermediate=True,
            gene_locus='sample_input/allgeneLocus.txt',
            tissue_file = 'sample_input/TissueType_CCLE.gct',
            max_iteration = 10)

CCLE_NRF2 start!
Start reading input files...
Done reading input files
Time used to read input: 32 second(s)
Number of features that pass the threshold is: 17108
Number of samples is: 1102

grid size: 34
bandwidth: 0.4123969868477072
Iteration1:
Time used to run one loop: 0 second(s)
CIC calculated in this round is: 0.3241140807918661
Best feature choosen in this round is: KEAP1_Mut_All
IC of new seed is: 0.34142948958067726
Iteration2:
Time used to run one loop: 0 second(s)
CIC calculated in this round is: 0.17245103990565655
Best feature choosen in this round is: ZC3H13_Mut_All
IC of new seed is: 0.32050656850295406
Iteration3:
Time used to run one loop: 0 second(s)
CIC calculated in this round is: 0.1770599023671661
Best feature choosen in this round is: GRAMD1B_Mut_All
IC of new seed is: 0.3361685393875556
Iteration4:
Time used to run one loop: 0 second(s)
CIC calculated in this round is: 0.175360383915207
Best feature choosen in this round is: NFASC_Mut_All
IC of new seed is: 0.32

(           HUH1_LIVER  SNU878_LIVER  NCIH2122_LUNG  JHH5_LIVER  \
 Name                                                             
 NFE2L2.V2    5.401344      5.318041       4.132284    3.915154   
 
            TFK1_BILIARY_TRACT  LUDLU1_LUNG  NCIH2023_LUNG  \
 Name                                                        
 NFE2L2.V2            3.881305     3.771016       3.698887   
 
            KON_UPPER_AERODIGESTIVE_TRACT  BEN_LUNG  BC3C_URINARY_TRACT  ...  \
 Name                                                                    ...   
 NFE2L2.V2                       3.685204  3.684497            3.586105  ...   
 
            DMS114_LUNG  NCIH1092_LUNG  NCIH1184_LUNG  NCIH1105_LUNG  \
 Name                                                                  
 NFE2L2.V2    -1.697747       -1.70457      -1.766949      -1.790118   
 
            KARPAS422_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE  IALM_LUNG  \
 Name                                                                 
 NFE2L2

In [8]:

# Input too large, please contact jim095@ucsd.edu for original file
runREVEALER(target_file='sample_input/CCLE_complete_sigs.gct',
            feature_files=['sample_input/CCLE_Mut_All_rename.gct'],
            gmt_file='sample_input/CCLE_Mut_All.gmt',
            out_folder='sample_output/MEK',
            prefix='CCLE_MEK',
            target_name='Pratilas_et_al_MEK_depen',
            if_pval=False, # Calculating p-value takes long time
            if_bootstrap=False, # Same, takes long time
            if_intermediate=True,
            gene_locus='sample_input/allgeneLocus.txt',
            tissue_file = 'sample_input/TissueType_CCLE.gct',
            max_iteration = 10)

CCLE_MEK start!
Start reading input files...
Done reading input files
Time used to read input: 31 second(s)
Number of features that pass the threshold is: 17109
Number of samples is: 1102

grid size: 34
bandwidth: 0.34466146083406246
seed Search...
Picked seed is: SCN11A_Mut_All
Iteration1:
Time used to run one loop: 0 second(s)
CIC calculated in this round is: 0.16635361679628052
Best feature choosen in this round is: TNN_Mut_All
IC of new seed is: 0.1931787421352406
Iteration2:
Time used to run one loop: 0 second(s)
CIC calculated in this round is: 0.1574154002578458
Best feature choosen in this round is: TBX21_Mut_All
IC of new seed is: 0.21392052281135995
Iteration3:
Time used to run one loop: 0 second(s)
CIC calculated in this round is: 0.15715300643700472
Best feature choosen in this round is: RNF19A_Mut_All
IC of new seed is: 0.21913925500822196
Iteration4:
Time used to run one loop: 0 second(s)
CIC calculated in this round is: 0.1583724521973753
Best feature choosen in this rou

(                          WM115_SKIN  SNU1196_BILIARY_TRACT  C32_SKIN  \
 Name                                                                    
 Pratilas_et_al_MEK_depen    1.651292               1.640382  1.616802   
 
                           MELJUSO_SKIN  WM793_SKIN  COLO829_SKIN  \
 Name                                                               
 Pratilas_et_al_MEK_depen      1.615247    1.607975       1.58044   
 
                           A375_SKIN_CJ3_RESISTANT  HCC44_LUNG  RVH421_SKIN  \
 Name                                                                         
 Pratilas_et_al_MEK_depen                 1.570003    1.561813     1.548327   
 
                           SEKI_SKIN  ...  NCIH2227_LUNG  NCIH660_PROSTATE  \
 Name                                 ...                                    
 Pratilas_et_al_MEK_depen   1.546633  ...      -3.017758         -3.106132   
 
                           HSC5_SKIN  MTA_HAEMATOPOIETIC_AND_LYMPHOID_TISSUE  \
 Name       

In [5]:
verify_test('sample_output/NRF2/CCLE_NRF2_Result.txt', 'sample_output/CCLE_NRF2_groudtruth.txt')

The test finished running and it is successful!
