### Find TF-co-occurences for cellcluster/celltypes of WP3 (TOBIAS)

constant variables and imports

In [1]:
from tfcomb import CombObj
import os
import pathlib

'''
Constants for this script
'''

genome_path="../testdaten/hg19_masked.fa"
main_jaspar_file="../testdaten/JASPAR2020_CORE_vertebrates.meme" 

# path where market basket analyses for cluster are put.
result_path="./results/wp3/"

# folder of wp3, where clusters are
path_to_clusters="/mnt/workspace_stud/stud8/testordner/"
# identifier for the folder names, we need 
cluster_folder_identifier="cluster"

# create result folders 
if not os.path.exists(result_path):
     pathlib.Path(result_path).mkdir(parents=True, exist_ok=True)


Function definitions for Market Basket analyses + saving

In [3]:
def do_market_basket_analyses_for_cell_cluster(cell_cluster_name: str, bindetect_path: str, condition: str):
    '''
        Does market basket analyses with bindetect output of Tobias (WP3)
    '''
    comb = CombObj()
    # Condition wird bei WP3 festgelegt , nochmal abstimmen. Wie oft lassen sie Tobias laufen 
    comb.TFBS_from_TOBIAS(bindetect_path=bindetect_path, condition=condition, overwrite=False)
    
    print(f'Start market basket analyses for cell-cluster/type: {cell_cluster_name}')
    comb.market_basket(threads=10)
    if len(comb.rules) <= 0:
        print(f'Could not find TF-cooccurences for cell-cluster/type: {cell_cluster_name}')
        return
    print(f'Finished market basket analyses for cell-cluster/type: {cell_cluster_name}')
    print(f'Found rules: {len(comb.rules)}')
    comb.to_pickle(f'{result_path}{cell_cluster_name}.pkl')
    print(f'Saved: {result_path}{cell_cluster_name}.pkl')
   
    

In [4]:
def read_in_folder_names_for_cluster():
    #glob()
    dirlist = [ item for item in os.listdir(path_to_clusters) if os.path.isdir(os.path.join(path_to_clusters, item))]
    cluster_names = []
    for folder in dirlist:
        if cluster_folder_identifier in folder:
            cluster_names.append(folder)
    return cluster_names




In [6]:
# Identifing the clusters to read from
# bsp. ["cluster10","cluster20" ....]
cluster_names = read_in_folder_names_for_cluster()
print(f"Clusters:{cluster_names}")

# bsp. ["/mnt/workspace_stud/stud8/testordner/cluster10/BINDetect/", ...]
cluster_bindetect_paths = []
for name in cluster_names:
    cluster_path = f"{path_to_clusters}{name}/BINDetect/"
    cluster_bindetect_paths.append(cluster_path)
    
print(f"Paths to bindetect folders of the clusters: {cluster_bindetect_paths}")


Clusters:['cluster10', 'cluster9', 'cluster3', 'cluster5', 'cluster7']
Paths to bindetect folders of the clusters: ['/mnt/workspace_stud/stud8/testordner/cluster10/BINDetect/', '/mnt/workspace_stud/stud8/testordner/cluster9/BINDetect/', '/mnt/workspace_stud/stud8/testordner/cluster3/BINDetect/', '/mnt/workspace_stud/stud8/testordner/cluster5/BINDetect/', '/mnt/workspace_stud/stud8/testordner/cluster7/BINDetect/']


In [9]:
for idx, path in enumerate(cluster_bindetect_paths):
    
    if not os.path.exists(path):
        print(f"No Bindetect folder for cluster: {cluster_names[idx]}. Continue with next.")
        continue
        
    do_market_basket_analyses_for_cell_cluster(cell_cluster_name=cluster_names[idx],
                                               bindetect_path=path,
                                               condition=f"{cluster_names[idx]}Scores")
    print(f"MB Done for cluster: {cluster_names[idx]}")
print("End market basket analyses")

INFO: Read 1008547 sites (746 unique names) from condition 'cluster10Scores'
Start market basket analyses for cell-cluster/type: cluster10
INFO: Setting up binding sites for counting
INFO: Counting co-occurrences within sites
INFO: Counting co-occurrence within background
INFO: Progress: 10%
INFO: Progress: 20%
INFO: Progress: 30%
INFO: Progress: 40%
INFO: Progress: 50%
INFO: Progress: 60%
INFO: Progress: 70%
INFO: Progress: 80%
INFO: Progress: 90%
INFO: Finished!
INFO: Done finding co-occurrences! Run .market_basket() to estimate significant pairs
INFO: Market basket analysis is done! Results are found in <CombObj>.rules
Finished market basket analyses for cell-cluster/type: cluster10
Found rules: 555800
Saved: ./results/wp3/cluster10.pkl
MB Done for cluster: cluster10
No Bindetect folder for cluster: cluster9. Continue with next.
No Bindetect folder for cluster: cluster3. Continue with next.
No Bindetect folder for cluster: cluster5. Continue with next.
No Bindetect folder for cluste