# Checking motif overlap
We want to see, for motifs that appear enriched in CND, if they are also present in ATL. We used `annotatePeaks.pl` from HOMER for the heavy lifting here; we now just need to read in the inputs and summarize them.

In [1]:
from os import listdir
from os.path import splitext, isfile
import pandas as pd
from tqdm import tqdm

## Reading in the data
Data are stored as tab-delimited files. Because there are so many files, we run into memory issues, so we won't read them in all at once -- we'll read in just the first one to examine it and write the code, and then use a generator to process all of them on the fly.

In [6]:
data_dir = '../data/outputs/annotated_motifs/CND_searching_ATL_timey_background_ShD_16Apr2025'
motif_matches = {}
for f in tqdm(listdir(data_dir)):
    if isfile(f'{data_dir}/{f}'):
        motif_matches[splitext(f)[0]] = pd.read_csv(f'{data_dir}/{f}', sep='\t', header=0)
    break

  0%|                                                                                          | 0/1333 [00:00<?, ?it/s]


In [7]:
motif_matches['module_0_motif10_CND_searching_ATL_output']

Unnamed: 0,"PeakID (cmd=annotatePeaks.pl tss ATL_v3_hc -size -1500,100 -m /home/farre/Serena/rhythmic-potato/data/outputs/ATL_CND_ShD_Leaf_timey_backgrounds/ATL_ShD_Leaf_vs_CND_ShD_Leaf/module_0/CND/homerResults/motif10.motif)",Chr,Start,End,Strand,Not Used,Focus Ratio/Region Size,Annotation,Detailed Annotation,Distance to TSS,...,Nearest Unigene,Nearest Refseq,Nearest Ensembl,Gene Name,Gene Alias,Gene Description,Gene Type,CpG%,GC%,"2-ATCTAAGGAT,BestGuess:PB0040.1_Lef1_1/Jaspar(0.702) Distance From Peak(sequence,strand,conservation)"
0,Soltu.Atl_v3.11_3G015560.16,chr11_3,32961076,32962676,+,.,,promoter-TSS (Soltu.Atl_v3.11_3G015560.1),,0,...,,,,,,,,0.009375,0.266708,
1,Soltu.Atl_v3.03_2G004220.1,chr03_2,4497273,4498873,-,.,,promoter-TSS (Soltu.Atl_v3.03_2G004220.1),,0,...,,,,,,,,0.022500,0.366646,
2,Soltu.Atl_v3.03_1G027600.4,chr03_1,43642070,43643670,+,.,,promoter-TSS (Soltu.Atl_v3.03_1G027600.1),,0,...,,,,,,,,0.012500,0.277951,
3,Soltu.Atl_v3.06_3G014950.1,chr06_3,33844436,33846036,+,.,,promoter-TSS (Soltu.Atl_v3.06_3G014950.1),,0,...,,,,,,,,0.015000,0.299813,
4,Soltu.Atl_v3.02_3G001650.1,chr02_3,8216362,8217962,+,.,,promoter-TSS (Soltu.Atl_v3.02_3G001650.1),,0,...,,,,,,,,0.016875,0.280450,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267390,Soltu.Atl_v3.05_3G002340.1,chr05_3,1980486,1982086,-,.,,promoter-TSS (Soltu.Atl_v3.05_3G002340.1),,0,...,,,,,,,,0.009740,0.236821,
267391,Soltu.Atl_v3.10_3G001600.9,chr10_3,1339424,1341024,-,.,,promoter-TSS (Soltu.Atl_v3.10_3G001600.1),,0,...,,,,,,,,0.026250,0.331043,
267392,Soltu.Atl_v3.10_0G018110.4,chr10_0,27225889,27227489,+,.,,promoter-TSS (Soltu.Atl_v3.10_0G018110.4),,0,...,,,,,,,,0.012500,0.377264,
267393,Soltu.Atl_v3.08_2G019280.2,chr08_2,47630744,47632344,-,.,,promoter-TSS (Soltu.Atl_v3.08_2G019280.1),,0,...,,,,,,,,0.005625,0.334166,
