# Motif Detection of Every Gene Detected in Experimental Data

Teague McCracken 
03/30/2025

In [1]:
# Initialize the notebook, set environment to transcriptomics
import pandas as pd
import subprocess
import os
import multiprocessing
from pathlib import Path
import numpy as np

data_path = '/home/temccrac/Programs/git_clones/ECE759_Project/cellular_clarity'
checkpoint_path = '/home/temccrac/Programs/cellular_clarity_project/checkpoints' # on Teague's server 

## Create the New Reference Genome, trying Araport11

In [2]:
# File paths for step 1, had 5324 missing genes
gtf = "/home/temccrac/Programs/data/genomes/Arabidopsis_thaliana.TAIR10.54.gtf"
#gene_list = "/home/temccrac/Programs/git_clones/ECE759_Project/cellular_clarity/genes_of_interest.txt" # our results
gene_list = "/home/temccrac/Programs/git_clones/ECE759_Project/cellular_clarity/paperresults/genes_of_interest.txt"
genome_fa = "/home/temccrac/Programs/data/genomes/Arabidopsis_thaliana.TAIR10.dna.toplevel.fa"
genome_sizes = "/home/temccrac/Programs/data/genomes/Arabidopsis.genome"

In [2]:
# File paths for step 1 using Araport 11 annotations, 2016, had ~2778 missing genes, only 35 are DEGs
gtf = "/home/temccrac/Programs/data/genomes/Araport11/Araport11_renamed.gff"
#gene_list = "/home/temccrac/Programs/git_clones/ECE759_Project/cellular_clarity/genes_of_interest.txt" # our results
gene_list = "/home/temccrac/Programs/git_clones/ECE759_Project/cellular_clarity/paperresults/genes_of_interest.txt"
genome_fa = "/home/temccrac/Programs/data/genomes/Araport11/Arabidopsis_thaliana.TAIR10.dna.toplevel.fa"
genome_sizes = "/home/temccrac/Programs/data/genomes/Araport11/TAIR10.chrom.sizes"

In [4]:
# File paths for step 1 using Araport 11 Mar9 2021 annotations, missing 2900 genes
gtf = "/home/temccrac/Programs/data/genomes/Araport11_2021/Araport11_renamed.gff"
#gene_list = "/home/temccrac/Programs/git_clones/ECE759_Project/cellular_clarity/genes_of_interest.txt" # our results
gene_list = "/home/temccrac/Programs/git_clones/ECE759_Project/cellular_clarity/paperresults/genes_of_interest.txt"
genome_fa = "/home/temccrac/Programs/data/genomes/Araport11_2021/Arabidopsis_thaliana.TAIR10.dna.toplevel.fa"
genome_sizes = "/home/temccrac/Programs/data/genomes/Araport11_2021/TAIR10.chrom.sizes"

In [5]:
# File paths for step 1 using Araport 11 Feb 2022 annotations
gtf = "/home/temccrac/Programs/data/genomes/Araport11_2021/Araport11_renamed.gff"
#gene_list = "/home/temccrac/Programs/git_clones/ECE759_Project/cellular_clarity/genes_of_interest.txt" # our results
gene_list = "/home/temccrac/Programs/git_clones/ECE759_Project/cellular_clarity/paperresults/genes_of_interest.txt"
genome_fa = "/home/temccrac/Programs/data/genomes/Araport11_2021/Arabidopsis_thaliana.TAIR10.dna.toplevel.fa"
genome_sizes = "/home/temccrac/Programs/data/genomes/Araport11_2021/TAIR10.chrom.sizes"

## Step 1 - Extract promoter sequences for each gene

Inputs needed: <br>
1. gtf_file (reference genome information) <br>
2. gene_list_file (detected genes from experiment) <br>
3. genome.fa <br>
4. genomes size file Arabidopsis.genome (contains genome size information) <br>

Outputs: all outputs go to project checkpoint folder <br>
1. filtered_gtf (reduced version of Arabidopsis gtf file) <br>
2. tss.bed (bed format with transcription start sites) <br>
3. promoters.bed (bed format with 1000 bp upstream regions (promoters) of genes) <br>
4. promoters.fasta (the final output, sequences of promoter regions of each gene of interest) <br>

In [10]:
# Core functions for promoter extraction
def filter_gtf_by_genes(gtf_file, gene_list_file, output_gtf):
    with open(gene_list_file, 'r') as f:
        genes = set(f.read().splitlines())

    with open(gtf_file, 'r') as gtf_in, open(output_gtf, 'w') as gtf_out:
        for line in gtf_in:
            if line.startswith('#'):
                continue
            if any(gene in line for gene in genes):
                gtf_out.write(line)

def extract_tss_bed_original(filtered_gtf, output_bed): #with tair .gtf annotations
    awk_cmd = (
        '''awk '$3 == "transcript" {
            match($0, /gene_id "([^"]+)"/, m);
            gene = m[1];
            if ($7 == "+") {
                print $1 "\\t" $4-1 "\\t" $4 "\\t" gene "\\t.\\t" $7;
            } else {
                print $1 "\\t" $5-1 "\\t" $5 "\\t" gene "\\t.\\t" $7;
            }
        }' '''
    )
    full_cmd = f"{awk_cmd} {filtered_gtf} > {output_bed}"
    subprocess.run(full_cmd, shell=True, check=True)

def extract_tss_bed(filtered_gtf, output_bed): # command for using araport11 .gff
    awk_cmd = (
    '''awk '$3 == "mRNA" {
        # Extract the gene ID from "Parent=..." in the 9th field
        match($9, /Parent=([^;]+)/, m);
        gene = m[1];

        # For plus-strand, TSS is the start ($4). For minus-strand, TSS is the end ($5).
        if ($7 == "+") {
            print $1 "\\t" $4-1 "\\t" $4 "\\t" gene "\\t.\\t" $7;
        } else {
            print $1 "\\t" $5-1 "\\t" $5 "\\t" gene "\\t.\\t" $7;
        }
    }' '''
    )
    full_cmd = f"{awk_cmd} {filtered_gtf} > {output_bed}"
    subprocess.run(full_cmd, shell=True, check=True)

def create_promoter_bed(tss_bed, genome_sizes, output_bed, length=1000):
    with open(output_bed, 'w') as out_f:
        subprocess.run([
            "bedtools", "flank",
            "-i", tss_bed,
            "-g", genome_sizes,
            "-l", str(length),
            "-r", "0",
            "-s"
        ], stdout=out_f, check=True)

def extract_promoter_fasta(genome_fa, promoter_bed, output_fasta):
    subprocess.run([
        "bedtools", "getfasta",
        "-fi", genome_fa,
        "-bed", promoter_bed,
        "-fo", output_fasta,
        "-s",
        "-name"  # preserves gene IDs in FASTA headers
    ], check=True)

# Functions to execute the above commands in parallel
def run_promoter_pipeline(chunk_id, gene_list_chunk, gtf, genome_fa, genome_sizes, outdir):
    filtered_gtf = f"{outdir}/filtered_{chunk_id}.gtf"
    tss_bed = f"{outdir}/tss_{chunk_id}.bed"
    promoter_bed = f"{outdir}/promoters_{chunk_id}.bed"
    fasta_out = f"{outdir}/promoters_{chunk_id}.fa"

    # Write chunked gene list
    gene_list_file = f"{outdir}/genes_{chunk_id}.txt"
    with open(gene_list_file, 'w') as f:
        f.write('\n'.join(gene_list_chunk))

    # Run sub-steps
    filter_gtf_by_genes(gtf, gene_list_file, filtered_gtf)
    extract_tss_bed(filtered_gtf, tss_bed)
    create_promoter_bed(tss_bed, genome_sizes, promoter_bed)
    extract_promoter_fasta(genome_fa, promoter_bed, fasta_out)

def parallel_promoter_extraction(genes_file, gtf, genome_fa, genome_sizes, outdir, num_cpus=10):
    with open(genes_file) as f:
        genes = f.read().splitlines()

    # Split gene list into chunks
    chunk_size = len(genes) // num_cpus
    chunks = [genes[i:i + chunk_size] for i in range(0, len(genes), chunk_size)]

    os.makedirs(outdir, exist_ok=True)

    args = [(i, chunks[i], gtf, genome_fa, genome_sizes, outdir) for i in range(len(chunks))]
    with multiprocessing.Pool(num_cpus) as pool:
        pool.starmap(run_promoter_pipeline, args)

    # Merge all .fa files
    merged_fasta = os.path.join(outdir, "promoters_merged.fa")
    with open(merged_fasta, 'w') as outfile:
        for i in range(len(chunks)):
            chunk_fasta = os.path.join(outdir, f"promoters_{i}.fa")
            with open(chunk_fasta, 'r') as infile:
                outfile.write(infile.read())

    print("✅ All promoters extracted and merged.")

In [11]:
# Run steps for Step 1
parallel_promoter_extraction(
    genes_file=gene_list,
    gtf=gtf,
    genome_fa=genome_fa,
    genome_sizes=genome_sizes,
    outdir=checkpoint_path,
    num_cpus=10
)

print("✅ Promoter FASTA extraction complete.")

✅ All promoters extracted and merged.
✅ Promoter FASTA extraction complete.


After running, I deleted all the unneccessary intermediate files and kept only the promoter_merged.fa file

Now, I check that all the genes of interests have promoters that were saved in the merged promoter file

In [12]:
def check_promoter_coverage(gene_list_file, promoters_fasta):
    # Read expected gene list
    with open(gene_list_file) as f:
        expected_genes = set(f.read().splitlines())

    # Read FASTA headers and extract gene IDs from lines like "AT5G23180::5:..."
    with open(promoters_fasta) as f:
        found_genes = set(
            line[1:].strip().split("::")[0]
            for line in f if line.startswith(">")
        )

    missing = expected_genes - found_genes
    extra = found_genes - expected_genes

    print(f"✅ Found promoters for {len(found_genes)} / {len(expected_genes)} genes.")
    if missing:
        print(f"❌ Missing {len(missing)} genes:")
        for gene in list(missing)[:10]:
            print("  ", gene)
    else:
        print("🎉 No missing genes.")

    if extra:
        print(f"⚠️ {len(extra)} unexpected gene(s) in output (not in your original list):")
        for gene in list(extra)[:5]:
            print("  ", gene)
    return missing


# Example usage:
missing = check_promoter_coverage(
    gene_list_file=gene_list,
    promoters_fasta="/home/temccrac/Programs/cellular_clarity_project/checkpoints/promoters_merged.fa"
)

✅ Found promoters for 30824 / 33602 genes.
❌ Missing 2778 genes:
   AT1G52350
   AT1G20040
   AT1G80250
   AT4G21605
   AT3G31908
   AT2G33950
   AT3G28321
   AT4G03405
   AT1G69110
   AT4G34071


Looking at which ones are missing from the reference annotation and then I will save this for our records

In [13]:
DEGs_pr = pd.read_csv(os.path.join(data_path, 'paperresults/DEGs_clusters_epidermis_paperresults.csv'), index_col=None) # We are working with the Epidermis data
notmissing = set(DEGs_pr['AGI']) - set(missing) 
len(notmissing)

2704

In [19]:
missing_DEGs = set(missing).intersection(set(DEGs_pr['AGI']))
missing_DEGs = pd.Series(list(missing_DEGs))
missing_DEGs.to_csv(os.path.join(data_path, 'Araport11_2016_missingDEGs.txt'), index=False, header=False)

## Step 2 - Clustering / DEG Detection

I will use the papers DEG and cluster results

In [15]:
DEGs_included = DEGs_pr[~DEGs_pr['AGI'].isin(missing_DEGs)]
cluster_sizes = DEGs_included['cluster_id'].value_counts()
print('smallest cluster size is', cluster_sizes.min(), 'genes')
DEGs_included

smallest cluster size is 3 genes


Unnamed: 0,AGI,Alias,cluster_id,DE_6,DE_12,DE_18,DE_24,DE_30,DE_36
0,AT1G01140,"CIPK9, PKS6, SnRK3.12",1,1,0,0,0,0,0
1,AT1G02400,"ATGA2OX4, ATGA2OX6, DTA1, GA2OX6",1,0,1,0,0,0,1
2,AT1G04700,,1,0,0,0,1,0,0
3,AT1G09080,BIP3,1,1,0,0,0,0,0
4,AT1G12040,LRX1,1,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...
2734,AT5G23405,,49,0,0,0,1,0,0
2735,AT5G62190,PRH75,49,0,0,0,1,0,0
2736,AT4G03470,,50,1,0,0,0,0,0
2737,AT4G24770,"ATRBP31, ATRBP33, CP31, RBP31",50,1,1,0,0,0,0


## Step 3 - Motif Detection Using AME

combined_motifs.meme is the DAP-Seq database

In [20]:
import os
import argparse
import subprocess
import multiprocessing
from Bio import SeqIO
import random
random.seed(33)

In [None]:
motif_detection_path = '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
promoters_fasta = '/home/temccrac/Programs/cellular_clarity_project/checkpoints/promoters_Araport11_2016.fa'
out_dir = "/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment"
ame_executable = "/home/temccrac/meme/bin/ame"                        # AME executable (or provide full path)
additional_params = ""                        # Additional parameters for AME (if any)
cpus = 10                                      # Number of parallel processes


foreground_groups = DEGs_included.groupby("cluster_id")["AGI"].apply(list).to_dict()
nonDEGs_included = [id for id in list(notmissing) if id not in DEGs_included]
# For each cluster, randomly sample a number of genes equal to the size of that cluster
background_groups = {}
for cluster_id, deg_genes in foreground_groups.items():
    n = len(deg_genes)
    # Use random.sample to get n genes from notmissing (sampling without replacement)
    sampled_genes = random.sample(nonDEGs_included, n)
    background_groups[cluster_id] = sampled_genes

In [22]:
def run_ame(fg_fasta, bg_fasta, ame_output_dir, motif_detection_path, ame_executable="ame", additional_params=""):
    """
    Runs the AME command using the provided motif file (first positional argument),
    the foreground FASTA (second positional argument), and the background FASTA via --control.
    """
    cmd = [
        ame_executable,
        "--oc", ame_output_dir,
        "--control", bg_fasta,
        fg_fasta,               # First positional argument: sequence file (foreground)
        motif_detection_path    # Next positional argument: motif file in MEME format
    ]
    if additional_params:
        cmd.extend(additional_params.split())
    print("Running AME command:", " ".join(cmd))
    subprocess.run(cmd, check=True)

# Function to process one group: create FASTA files and run AME
def process_group(group_name, fg_gene_ids, bg_gene_ids, promoters_dict, out_dir, motif_detection_path, ame_executable, additional_params):
    # Define output FASTA file paths for foreground and background
    fg_fasta = os.path.join(out_dir, f"{group_name}_foreground.fa")
    bg_fasta = os.path.join(out_dir, f"{group_name}_background.fa")
    
    # Write the foreground FASTA (genes in this group)
    with open(fg_fasta, "w") as fg_handle:
        for gene in fg_gene_ids:
            if gene in promoters_dict:
                SeqIO.write(promoters_dict[gene], fg_handle, "fasta")
            else:
                print(f"Warning: Foreground gene {gene} not found in promoters dictionary.")
    
    # Write the background FASTA (all other genes for this group)
    with open(bg_fasta, "w") as bg_handle:
        for gene in bg_gene_ids:
            if gene in promoters_dict:
                SeqIO.write(promoters_dict[gene], bg_handle, "fasta")
            else:
                print(f"Warning: Background gene {gene} not found in promoters dictionary.")
    
    # Create an output directory for AME results for this group
    ame_out = os.path.join(out_dir, f"{group_name}_ame")
    os.makedirs(ame_out, exist_ok=True)
    
    # Run AME
    run_ame(fg_fasta, bg_fasta, ame_out, motif_detection_path, ame_executable, additional_params)

# Function to load all promoters from a FASTA file into a dictionary keyed by gene ID
def load_promoters(promoters_fasta):
    """
    Loads the large FASTA file of promoters into a dictionary keyed by gene ID.
    Assumes that each record's id has the format 'AGI::...'
    and uses the part before the first "::" as the key.
    """
    promoters = {}
    for record in SeqIO.parse(promoters_fasta, "fasta"):
        # Extract the AGI by splitting on "::"
        gene_id = record.id.split("::")[0]
        promoters[gene_id] = record
    return promoters

In [25]:
# Load the promoter sequences into a dictionary
promoters_dict = load_promoters(promoters_fasta)

# Prepare arguments for each group
args_list = []
for group_name, fg_gene_ids in foreground_groups.items():
    bg_gene_ids = background_groups.get(group_name, [])
    args_list.append((group_name, fg_gene_ids, bg_gene_ids, promoters_dict, out_dir, motif_detection_path, ame_executable, additional_params))

# Run AME for each group in parallel using multiprocessing
pool = multiprocessing.Pool(processes=cpus)
pool.starmap(process_group, args_list)
pool.close()
pool.join()

print("All AME analyses are complete.")

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/1_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/1_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/1_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme
Running AME command:

Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/1_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (60).
MOTIF: 1 SEQ: 120/120
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
Added /home/temccrac/Programs/data

 /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/3_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/3_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/3_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


MOTIF: 28 SEQ: 120/120

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/5_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/5_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/5_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/5_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (93).
MOTIF: 1 SEQ: 186/1860
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
MOTIF: 82 SEQ: 120/120

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/7_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/7_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/7_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/7_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (119).
MOTIF: 1 SEQ: 238/23860
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
MOTIF: 125 SEQ: 120/120

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/9_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/9_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/9_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (103).
MOTIF: 1 SEQ: 206/20620
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/9_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 29 SEQ: 206/2020

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/11_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/11_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/11_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (64).
MOTIF: 1 SEQ: 128/12860
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/11_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 52 SEQ: 206/2060

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/13_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/13_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/13_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (27).
MOTIF: 1 SEQ: 54/542380
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/13_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 86 SEQ: 206/2060

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/15_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/15_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/15_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/15_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (69).
MOTIF: 1 SEQ: 138/13820
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
MOTIF: 9 SEQ: 138/13886

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/17_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/17_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/17_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/17_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (72).
MOTIF: 1 SEQ: 144/14420
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
MOTIF: 8 SEQ: 144/14428

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/19_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/19_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/19_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (56).
MOTIF: 1 SEQ: 112/11248
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/19_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading primary sequences.
Load

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/12_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/12_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/12_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


MOTIF: 872 SEQ: 54/5428
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/12_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (85).
MOTIF: 1 SEQ: 170/17012
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
MOTIF: 

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/2_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/2_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/2_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


MOTIF: 872 SEQ: 120/120
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/2_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (63).
MOTIF: 1 SEQ: 126/12628
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
MOTIF: 8

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/10_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/10_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/10_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


MOTIF: 872 SEQ: 128/128
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/10_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (49).
MOTIF: 1 SEQ: 98/98/112
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
MOTIF: 

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/18_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/18_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/18_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


MOTIF: 872 SEQ: 112/112
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/18_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (52).
MOTIF: 630 SEQ: 152/152

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/14_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/14_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/14_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


MOTIF: 1 SEQ: 104/10470
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
MOTIF: 872 SEQ: 138/138
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/14_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 85 SEQ: 104/1046

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/4_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/4_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/4_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (62).
MOTIF: 872 SEQ: 186/186
MOTIF: 1 SEQ: 124/1244
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/4_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 48

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/16_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/16_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/16_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


MOTIF: 872 SEQ: 144/144
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (56).
MOTIF: 1 SEQ: 112/11204
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/16_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/20_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/20_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/20_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme
Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/8_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/8_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/8_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


MOTIF: 872 SEQ: 152/152
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/20_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 872 SEQ: 206/206
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoi

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/21_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/21_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/21_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme
Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/6_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/6_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/6_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/21_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 391 SEQ: 112/112
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/6_a

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/23_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/23_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/23_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (47).
MOTIF: 1 SEQ: 94/944824
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (37).
MOTIF: 1 SEQ: 74/746804
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/25_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/25_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/25_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/25_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 872 SEQ: 48/4812
MOTIF: 218 SEQ: 74/7412

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/27_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/27_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/27_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme
Running AME command:

Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (56).
MOTIF: 1 SEQ: 112/11212
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/27_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 872 SEQ: 104/104
MOTIF: 

 /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/29_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/29_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/29_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (39).
MOTIF: 1 SEQ: 78/781122
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/29_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 422 SEQ: 74/7412

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/31_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/31_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/31_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (59).
MOTIF: 1 SEQ: 118/11824
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/31_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 872 SEQ: 68/6812
MOTIF: 

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/33_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/33_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/33_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (104).
MOTIF: 1 SEQ: 208/20824
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/33_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 872 SEQ: 112/112
Loadin

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/35_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/35_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/35_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme
Running AME command:

MOTIF: 14 SEQ: 198/1984
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/35_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 390 SEQ: 112/112

 /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/37_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/37_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/37_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (32).
MOTIF: 1 SEQ: 64/647418
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/37_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 872 SEQ: 74/7418
Loading

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/39_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/39_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/39_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/39_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (85).
MOTIF: 132 SEQ: 66/6698
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
MOTIF: 346 SEQ: 118/118

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/22_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/22_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/22_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


MOTIF: 872 SEQ: 94/9412
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/22_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 391 SEQ: 118/118

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/24_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/24_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/24_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


MOTIF: 872 SEQ: 90/9018
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/24_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (81).
MOTIF: 1 SEQ: 162/16212
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
MOTIF: 

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/28_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/28_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/28_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (64).
MOTIF: 1 SEQ: 128/12818
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
MOTIF: 872 SEQ: 78/7898
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/28_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/36_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/36_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/36_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


MOTIF: 872 SEQ: 64/6430
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/36_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 872 SEQ: 112/112
MOTIF: 118 SEQ: 130/130

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/26_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/26_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/26_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/26_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (60).
MOTIF: 1 SEQ: 120/12030
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
MOTIF: 823 SEQ: 66/6628

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/38_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/38_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/38_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (59).
MOTIF: 1 SEQ: 118/11898
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
MOTIF: 872 SEQ: 66/6630
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/38_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/30_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/30_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/30_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


MOTIF: 872 SEQ: 118/118
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/30_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (49).
MOTIF: 1 SEQ: 98/98/170
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
MOTIF: 

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/41_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/41_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/41_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme
Running AME command:

MOTIF: 872 SEQ: 86/8618
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/41_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 758 SEQ: 120/120

 /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/43_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/43_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/43_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme
Running AME command:

Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (33).
MOTIF: 1 SEQ: 66/66/118
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/43_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
MOTIF: 872 SEQ: 130/130
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 

 /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/34_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/34_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/34_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme
Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/45_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/45_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/45_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme
Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/32_ame --control /home/t

MOTIF: 872 SEQ: 198/198
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (66).
MOTIF: 1 SEQ: 132/13270
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/34_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Added /

Running AME command:

Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (63).
MOTIF: 1 SEQ: 126/1262
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (52).
MOTIF: 1 SEQ: 104/1042
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
MOTIF: 872 SEQ: 98/982
MOTIF: 58 SEQ: 70/7066

 /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/47_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/47_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/47_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme
Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/49_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/49_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/49_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/47_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (45).
MOTIF: 1 SEQ: 90/907062
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
Added /home/temccrac/Programs/d

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/42_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/42_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/42_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


MOTIF: 872 SEQ: 66/6626
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/42_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (22).
MOTIF: 1 SEQ: 44/44/104
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
MOTIF: 

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/50_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/50_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/50_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


MOTIF: 872 SEQ: 44/4404
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/50_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 872 SEQ: 70/7026
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (3).
MOTIF: 1 SEQ: 6/6
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score t

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/46_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/46_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/46_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


MOTIF: 872 SEQ: 96/9626
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/46_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 791 SEQ: 44/4404

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/48_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/48_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/48_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


MOTIF: 872 SEQ: 90/9032
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/48_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (13).
MOTIF: 1 SEQ: 26/26/132
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing over sequence PWM score threshold.
MOTIF: 

Running AME command: /home/temccrac/meme/bin/ame --oc /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/44_ame --control /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/44_background.fa /home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/44_foreground.fa /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme


MOTIF: 872 SEQ: 132/132
Added /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme to motif_sources which now has 1 file names.
Motif file name is /home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme.
The output directory '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/44_ame' already exists.
Its contents will be overwritten.
E-value threshold for reporting results: 10
Checking alphabets in 1 motif files.
Loading motifs from file '/home/temccrac/Programs/data/cellular_clarity/dap_seq_data/dap_data_v4/ArabidopsisDAPv1.meme'
MOTIF: 193 SEQ: 80/8026
MOTIF: 872 SEQ: 26/26
Loading primary sequences.
Loading control sequences.
Not in partition maximization mode. Fixing partition at the number of primary sequences (24).
MOTIF: 1 SEQ: 48/48
Sorting sequences by sequence PWM score to get PWM ranks; breaking ties to put negatives first.
Leaving sequences sorted by PWM score.
Optimizing 

All AME analyses are complete.


## Step 4 - Processing Motif Information Output

In [12]:
import re
motif_enrichment_path = os.path.join(checkpoint_path, 'motif_enrichment')

In [13]:
# Create all the file paths
motif_results_paths = []
for root, dir, files in os.walk(motif_enrichment_path):
    for file in files: 
        if 'sequences.tsv' in file: 
            motif_results_paths.append(os.path.join(root, file))
motif_results_paths

['/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/38_ame/sequences.tsv',
 '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/1_ame/sequences.tsv',
 '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/36_ame/sequences.tsv',
 '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/37_ame/sequences.tsv',
 '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/12_ame/sequences.tsv',
 '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/28_ame/sequences.tsv',
 '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/11_ame/sequences.tsv',
 '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/43_ame/sequences.tsv',
 '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/3_ame/sequences.tsv',
 '/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrich

In [None]:
# Takes one cluster's motif output and converts it to a dataframe with all the motifs mapped to genes they were identified in
def process_cluster_moitfs(path):
    df = pd.read_csv(path, delimiter = '\t')
    df_subset = df[['motif_ID', 'seq_ID']]
    pattern = r'^(.+?)::'
    df_subset_processed = df_subset.copy()
    df_subset_processed['seq_ID'] = df_subset['seq_ID'].str.extract(pattern)
    df_subset_processed.columns = [df_subset.columns[0], 'gene']
    return df_subset_processed

Unnamed: 0,motif_ID,gene
0,NAC_tnt.VND6_col_v31_m1,AT2G31410
1,NAC_tnt.VND6_col_v31_m1,AT4G30290
2,NAC_tnt.VND6_col_v31_m1,AT4G25070
3,NAC_tnt.VND6_col_v31_m1,AT2G34070
4,NAC_tnt.VND6_col_v31_m1,AT1G55330
5,NAC_tnt.VND6_col_v31_m1,AT3G01420
6,NAC_tnt.VND6_col_v31_m1,AT4G36380
7,NAC_tnt.VND6_col_v31_m1,AT4G26320
8,NAC_tnt.VND6_col_v31_m1,AT5G55550
9,NAC_tnt.VND6_col_v31_m1,AT1G09080


In [17]:
clust_id_pattern = r'(?<=/)(\d{1,2})(?=_ame)'
motif_enrichment_dict = {}
for n, path in enumerate(motif_results_paths):
    clust_id = re.search(clust_id_pattern, path)
    print(path)
    motif_enrichment_dict[clust_id] = process_cluster_moitfs(path)

motif_enrichment_dict

/home/temccrac/Programs/cellular_clarity_project/checkpoints/motif_enrichment/38_ame/sequences.tsv


EmptyDataError: No columns to parse from file