In [1]:
# load candidate enhancers determined from B16CD8TIL-multiome_SCENICplus_scATACpp_cistopic
import os
import pickle
work_dir = '/Scottbrowne/members/smd/Projects/SD031/scenicplus/B16_CART'
tmp_dir = '/scratch2/devoes/tmp'

region_bin_topics_otsu = pickle.load(open(os.path.join(work_dir, 'ATAC/candidate_enhancers/region_bin_topics_otsu.pkl'), 'rb'))
region_bin_topics_top3k = pickle.load(open(os.path.join(work_dir, 'ATAC/candidate_enhancers/region_bin_topics_top3k.pkl'), 'rb'))
markers_dict = pickle.load(open(os.path.join(work_dir, 'ATAC/candidate_enhancers/markers_dict.pkl'), 'rb'))

In [2]:
# format as dictionary of PyRanges objects
import pyranges as pr
from pycistarget.utils import region_names_to_coordinates
region_sets = {}
region_sets['topics_otsu'] = {}
region_sets['topics_top_3'] = {}
region_sets['DARs'] = {}
for topic in region_bin_topics_otsu.keys():
    regions = region_bin_topics_otsu[topic].index[region_bin_topics_otsu[topic].index.str.startswith('chr')] #only keep regions on known chromosomes
    region_sets['topics_otsu'][topic] = pr.PyRanges(region_names_to_coordinates(regions))
for topic in region_bin_topics_top3k.keys():
    regions = region_bin_topics_top3k[topic].index[region_bin_topics_top3k[topic].index.str.startswith('chr')] #only keep regions on known chromosomes
    region_sets['topics_top_3'][topic] = pr.PyRanges(region_names_to_coordinates(regions))
for DAR in markers_dict.keys():
    #print(DAR)
    regions = markers_dict[DAR].index[markers_dict[DAR].index.str.startswith('chr')] #only keep regions on known chromosomes
    if len(regions) > 0: # no DARs for 10, 14, 2, 3, 8 ,9
        region_sets['DARs'][DAR] = pr.PyRanges(region_names_to_coordinates(regions))

In [3]:
for key in region_sets.keys():
    print(f'{key}: {region_sets[key].keys()}')

topics_otsu: dict_keys(['Topic1', 'Topic2', 'Topic3', 'Topic4', 'Topic5', 'Topic6', 'Topic7', 'Topic8', 'Topic9', 'Topic10', 'Topic11', 'Topic12', 'Topic13', 'Topic14', 'Topic15', 'Topic16', 'Topic17', 'Topic18', 'Topic19', 'Topic20', 'Topic21', 'Topic22', 'Topic23', 'Topic24', 'Topic25', 'Topic26', 'Topic27', 'Topic28', 'Topic29', 'Topic30', 'Topic31', 'Topic32', 'Topic33', 'Topic34', 'Topic35', 'Topic36', 'Topic37', 'Topic38', 'Topic39', 'Topic40', 'Topic41', 'Topic42', 'Topic43', 'Topic44', 'Topic45', 'Topic46', 'Topic47', 'Topic48'])
topics_top_3: dict_keys(['Topic1', 'Topic2', 'Topic3', 'Topic4', 'Topic5', 'Topic6', 'Topic7', 'Topic8', 'Topic9', 'Topic10', 'Topic11', 'Topic12', 'Topic13', 'Topic14', 'Topic15', 'Topic16', 'Topic17', 'Topic18', 'Topic19', 'Topic20', 'Topic21', 'Topic22', 'Topic23', 'Topic24', 'Topic25', 'Topic26', 'Topic27', 'Topic28', 'Topic29', 'Topic30', 'Topic31', 'Topic32', 'Topic33', 'Topic34', 'Topic35', 'Topic36', 'Topic37', 'Topic38', 'Topic39', 'Topic40', 

In [9]:
#set paths for pycistarget databases
rankings_db = os.path.join(work_dir, 'cistarget_database/SD031_B16_CART.regions_vs_motifs.rankings.feather')
scores_db =  os.path.join(work_dir, 'cistarget_database/SD031_B16_CART.regions_vs_motifs.scores.feather')
motif_annotation = '/Scottbrowne/members/smd/Projects/SD031/scenicplus/B16_CART/cistarget_database/HOCOMOCOv11_motifs.tbl'

In [10]:
if not os.path.exists(os.path.join(work_dir, 'motifs_no-promoter')):
    os.makedirs(os.path.join(work_dir, 'motifs_no-promoter'))
    
# run pycistarget wrapper
# score and rank each region based on a motif collection and the region's score for each motif
# "The input regions are intersected with regions in the database (with at least 40% overlap). 
# cisTarget uses a recovery curve approach (for each motif), 
# in which a step is taken in the y-axis when as region in the motif ranking (x-axis) is found in the region set.
# The Area Under the Curve for each motif is normalized based on the average AUC for all motifs and their standard deviation, 
# resulting in a Normalized Enrichment Score (NES) that is used to quantify the enrichment of a motif in a set of regions. 
# By default, motif that obtain a NES above 3.0 are kept."  https://pycistarget.readthedocs.io/en/latest/tools.html#cistarget 
from scenicplus.wrappers.run_pycistarget import run_pycistarget
run_pycistarget(
    region_sets = region_sets,
    species = 'mus_musculus',
    biomart_host = 'http://nov2020.archive.ensembl.org/',
    save_path = os.path.join(work_dir, 'motifs_no-promoter'),
    ctx_db_path = rankings_db,
    dem_db_path = scores_db,
    path_to_motif_annotations = motif_annotation,
    run_without_promoters = True, 
    n_cpu = 8,
    _temp_dir = os.path.join(tmp_dir, 'ray_spill'),
    annotation_version = 'HOCOMOCOv11',
    )

2024-06-04 17:57:34,217 pycisTarget_wrapper INFO     /Scottbrowne/members/smd/Projects/SD031/scenicplus/B16_CART/motifs_no-promoter folder already exists.
2024-06-04 17:57:34,744 pycisTarget_wrapper INFO     Loading cisTarget database for topics_otsu
2024-06-04 17:57:34,746 cisTarget    INFO     Reading cisTarget database
2024-06-04 17:59:06,635 pycisTarget_wrapper INFO     Running cisTarget for topics_otsu
[2m[36m(ctx_internal_ray pid=16118)[0m 2024-06-04 17:59:13,116 cisTarget    INFO     Running cisTarget for Topic1 which has 9306 regions
[2m[36m(ctx_internal_ray pid=16118)[0m 2024-06-04 17:59:17,571 cisTarget    INFO     No enriched motifs found for Topic1
[2m[36m(ctx_internal_ray pid=16118)[0m 2024-06-04 17:59:17,857 cisTarget    INFO     No enriched motifs found for Topic9
[2m[36m(ctx_internal_ray pid=16121)[0m 2024-06-04 17:59:18,361 cisTarget    INFO     Annotating motifs for Topic2
[2m[36m(ctx_internal_ray pid=16118)[0m 2024-06-04 17:59:18,004 cisTarget    INFO 

In [11]:
import dill
menr = dill.load(open(os.path.join(work_dir, 'motifs_no-promoter/menr.pkl'), 'rb'))

In [12]:
menr['DEM_topics_otsu_All'].DEM_results('Topic2') #2,4

Unnamed: 0,Logo,Contrast,Direct_annot,Orthology_annot,Log2FC,Adjusted_pval,Mean_fg,Mean_bg,Motif_hit_thr,Motif_hits
FOS_MOUSE.H11MO.0.A,,Topic2,Fos,,2.526217,0.0,3.554405,0.617021,3.0,1502.0
FOSL1_MOUSE.H11MO.0.A,,Topic2,Fosl1,,2.425178,0.0,4.076396,0.758971,3.0,1782.0
JUND_MOUSE.H11MO.0.A,,Topic2,Jund,,1.973197,0.0,4.202226,1.070257,3.0,1820.0
FOSL2_MOUSE.H11MO.0.A,,Topic2,Fosl2,,1.967752,0.0,4.273167,1.09244,3.0,1835.0
JUN_MOUSE.H11MO.0.A,,Topic2,Jun,,1.811483,0.0,4.529927,1.290567,3.0,1943.0
JUNB_MOUSE.H11MO.0.A,,Topic2,Junb,,1.74972,0.0,4.651789,1.383253,3.0,2082.0
NFE2_MOUSE.H11MO.0.A,,Topic2,Nfe2,,1.747571,0.0,1.928845,0.574416,3.0,729.0
BATF_MOUSE.H11MO.1.A,,Topic2,Batf,,1.638091,0.0,3.40724,1.094682,3.0,1564.0
FOSB_MOUSE.H11MO.0.A,,Topic2,Fosb,,1.55158,0.0,4.800166,1.63751,3.0,2156.0
ATF3_MOUSE.H11MO.0.A,,Topic2,Atf3,,1.489029,0.0,4.842672,1.725213,3.0,2082.0


In [13]:

menr['DEM_topics_otsu_All'].DEM_results('Topic4')

Unnamed: 0,Logo,Contrast,Direct_annot,Orthology_annot,Log2FC,Adjusted_pval,Mean_fg,Mean_bg,Motif_hit_thr,Motif_hits
TF7L2_MOUSE.H11MO.0.A,,Topic4,Tcf7l2,,1.067834,0.0,2.481409,1.183718,3.0,1008.0
FOS_MOUSE.H11MO.0.A,,Topic4,Fos,,1.063158,0.0,1.333673,0.638274,3.0,520.0
FOSL1_MOUSE.H11MO.0.A,,Topic4,Fosl1,,1.062401,0.0,1.54959,0.741997,3.0,636.0
TF7L1_MOUSE.H11MO.0.A,,Topic4,Tcf7l1,,0.962981,0.0,2.637887,1.353225,3.0,1083.0
JUND_MOUSE.H11MO.0.A,,Topic4,Jund,,0.779909,0.0,1.839299,1.071214,3.0,676.0
FOSL2_MOUSE.H11MO.0.A,,Topic4,Fosl2,,0.765584,0.0,1.773321,1.043094,3.0,671.0
BATF_MOUSE.H11MO.1.A,,Topic4,Batf,,0.721925,0.0,1.64484,0.997246,3.0,616.0
TFE3_MOUSE.H11MO.0.A,,Topic4,Tfe3,,0.704043,0.000333,0.751139,0.461086,3.0,203.0
ATF4_MOUSE.H11MO.0.A,,Topic4,Atf4,,0.702804,0.038198,0.28744,0.176597,3.0,79.0
JUNB_MOUSE.H11MO.0.A,,Topic4,Junb,,0.698533,0.0,2.095639,1.29133,3.0,802.0
