## TF-co-occurences with newly found tf-motifs

#### constant variables and imports

In [1]:
from tfcomb import CombObj
from os import listdir
from os.path import isfile, join

genome_path="../testdaten/hg19_masked.fa"
result_path="./results/wp5/"

motif_path="../testdaten/wp5/"
cell_line_path="../testdaten/enhancers/"
cell_line_file_name="CD14+_enhancers.bed"

#### function definition for the market basket analyses

In [2]:
def do_market_basket_analyses(genome_path:str,
                              cell_line_name: str,
                              rel_path_cell_line: str,
                              motif_name:str,
                              rel_path_motif:str ):
    '''
        Does market basket analyses for cell line with :cell_line_name and the rel_path: to the region data.
        Saves the data to name.pkl file.
        
        e.g.:
        rel_path: "../testdaten/enhancers/A375_enhancers.bed"
    '''
    print(f'Starting with tfbs-detection and market basket analyses for cell_line: {cell_line_name} and new motif {motif_name}, data path:{rel_path_cell_line}.')
    comb = CombObj()
    comb.TFBS_from_motifs(regions= rel_path_cell_line,
                   motifs=rel_path_motif,
                   genome=genome_path,
                   threads=4)
    print(f'TFBS detection is done for cell_line: {cell_line_name} and new motif {motif_name}')
    print(f'Start market basket analyses for cell line: {cell_line_name} and new motif {motif_name}')
    comb.market_basket(threads=10)
    
    if len(comb.rules) <= 0:
        print(f'Could not find TF-cooccurences for cell line: {cell_line_name} and new motif {motif_name}')
        return
    
    print(f'Finished market basket analyses for cell line: {cell_line_name} and new motif {motif_name}')
    print(f'Found rules: {len(comb.rules)}')
    comb.to_pickle(f'{result_path}{cell_line_name}_{motif_name}_complete.pkl')
    print(f'Saved complete rules to {result_path}{cell_line_name}_{motif_name}_complete.pkl')



#### function definition to read in files of folder

In [3]:
def read_in_file_names_of_folder(rel_path:str):
    return [f for f in listdir(rel_path) if isfile(join(rel_path, f))]


### Market basket analyses with newly found motifs
Idea: WP5 finds motifs for a special cell_line/cluster/tissue e.g. liver cells. WP6 takes the new motifs and tests them for TF-co-occurences in the specific cellline/cluster

In [4]:
new_motif_file_names = read_in_file_names_of_folder(rel_path=motif_path)
#cell_line_names = read_in_file_names_of_folder(rel_path=enhancer_path)

for new_motif_file_name in new_motif_file_names:
    motif_name = new_motif_file_name.split('.')[0]
    cell_line_name = cell_line_file_name.split('.')[0]

    print(new_motif_file_name)
    print(motif_name)
    print(cell_line_name)
    
    do_market_basket_analyses(genome_path=genome_path,
                              cell_line_name=cell_line_name,
                              rel_path_cell_line=f"{cell_line_path}{cell_line_file_name}",
                              motif_name=motif_name,
                              rel_path_motif=f"{motif_path}{new_motif_file_name}"
                             )

motif_10_motif_10.meme
motif_10_motif_10
CD14+_enhancers
Starting with tfbs-detection and market basket analyses for cell_line: CD14+_enhancers and new motif motif_10_motif_10, data path:../testdaten/enhancers/CD14+_enhancers.bed.
INFO: Scanning for TFBS with 4 thread(s)...
INFO: Progress: 51%
INFO: Finished!
INFO: Processing scanned TFBS
INFO: Identified 107 TFBS (1 unique names) within given regions
TFBS detection is done for cell_line: CD14+_enhancers and new motif motif_10_motif_10
Start market basket analyses for cell line: CD14+_enhancers and new motif motif_10_motif_10
INFO: Setting up binding sites for counting
INFO: Counting co-occurrences within sites
INFO: Counting co-occurrence within background
INFO: Finished!
INFO: Done finding co-occurrences! Run .market_basket() to estimate significant pairs
INFO: Market basket analysis is done! Results are found in <CombObj>.rules
Could not find TF-cooccurences for cell line: CD14+_enhancers and new motif motif_10_motif_10
motif_29_mot

### Analyse the co-occurences


In [6]:
results_files_names_market_basket_analyses= read_in_file_names_of_folder(rel_path=result_path)
#for file_name in results_files_names_market_basket_analyses:    

A = CombObj().from_pickle(f"{result_path}{results_files_names_market_basket_analyses[0]}")
A.prefix = "CD4+"
print(A.rules)


    

                        TF1       TF2  TF1_TF2_count  TF1_count  TF2_count  \
motif_29-motif_29  motif_29  motif_29              7        544        544   

                     cosine  zscore  
motif_29-motif_29  0.012868     0.0  
