# MEME Motif Analysis

This notebook extracts sequences adjacent to each peak and identify motifs

In [5]:
import os
import sys
import re
import pandas as pd
from Bio import SeqIO

print(sys.version)

3.11.3 (main, May 15 2023, 10:43:03) [Clang 14.0.6 ]


## Generate bed file containing coordinates of 500 bp flanking both directions of each peak

In [32]:
seq_length = 500
INFILE = "/Users/yunfei/2023_ChipSeq/peak_stat/BfmR-ChIP-49_seed1.nearest_orf.tsv"
df_peak = pd.read_csv(INFILE, sep='\t')
df_peak = df_peak[['chrom', 'summit_pos', 'match_type']]
df_peak['seq_start'] = df_peak['summit_pos'] - (seq_length // 2)
df_peak['seq_end'] = df_peak['summit_pos'] + (seq_length // 2)

df_peak_all = df_peak[['chrom', 'seq_start', 'seq_end']]
df_peak_intergenic = df_peak_all[df_peak['match_type'] == 'intergenic']
df_peak_intergenic = df_peak_intergenic[['chrom', 'seq_start', 'seq_end']]
df_peak_coding = df_peak_all[df_peak['match_type'] == 'coding']
df_peak_coding = df_peak_coding[['chrom', 'seq_start', 'seq_end']]

OUTDIR = "/Users/yunfei/2023_ChipSeq/bed_file_for_MEME_500"
os.makedirs(OUTDIR, exist_ok=True)

df_peak_all.to_csv(os.path.join(OUTDIR, "BfmR-ChIP-49_seed1_all_peak.bed"), sep='\t', header=False, index=False, mode='w')
df_peak_intergenic.to_csv(os.path.join(OUTDIR, "BfmR-ChIP-49_seed1_intergenic_peak.bed"), sep='\t', header=False, index=False, mode='w')
df_peak_coding.to_csv(os.path.join(OUTDIR, "BfmR-ChIP-49_seed1_coding_peak.bed"), sep='\t', header=False, index=False, mode='w')

## Generate fasta file from bed files

In [45]:
INDIR = "/Users/yunfei/2023_ChipSeq/bed_file_for_MEME_500"
FASTA = "/Users/yunfei/2023_ChipSeq/references/Ab_all.fasta"
for file in os.listdir(INDIR):
    if re.match('.+bed$', file):
        infile = os.path.join(INDIR, file)
        outfile = os.path.join(INDIR, file.split('.')[0]+'.fa')
        cmd = " ".join(['bedtools getfasta', '-fi', FASTA, '-bed', infile, '>', outfile])
        print("FASTA output: " + outfile)
        os.system(cmd)

FASTA output: /Users/yunfei/2023_ChipSeq/bed_file_for_MEME_500/BfmR-ChIP-49_seed1_coding_peak.fa
FASTA output: /Users/yunfei/2023_ChipSeq/bed_file_for_MEME_500/BfmR-ChIP-49_seed1_intergenic_peak.fa
FASTA output: /Users/yunfei/2023_ChipSeq/bed_file_for_MEME_500/BfmR-ChIP-49_seed1_all_peak.fa


## Run MEME

In [51]:
INDIR = "/Users/yunfei/2023_ChipSeq/bed_file_for_MEME_500"
OUTDIR = "/Users/yunfei/2023_ChipSeq/MEME-Chip_output"
DB = "/Users/yunfei/meme/motif_databases/PROKARYOTE/collectf.meme"
LOG = os.path.join(OUTDIR, 'meme-chip_log.txt')

os.makedirs(os.path.join(OUTDIR, 'fimo_output'), exist_ok=True)
os.makedirs(os.path.join(OUTDIR, 'html_output'), exist_ok=True)
for file in os.listdir(INDIR):
    if re.match('.+fa$', file):
        infile = os.path.join(INDIR, file)
        prefix = os.path.join(OUTDIR, file.split('.')[0])
        cmd = ' '.join(['meme-chip -meme-p 4', '-o', prefix, '-db', DB, infile, ">>", LOG, "2>&1"])
        print(cmd)
        os.system(cmd)

meme-chip -meme-p 4 -o /Users/yunfei/2023_ChipSeq/MEME-Chip_output/BfmR-ChIP-49_seed1_coding_peak -db /Users/yunfei/meme/motif_databases/PROKARYOTE/collectf.meme /Users/yunfei/2023_ChipSeq/bed_file_for_MEME_500/BfmR-ChIP-49_seed1_coding_peak.fa >> /Users/yunfei/2023_ChipSeq/MEME-Chip_output/meme-chip_log.txt 2>&1
meme-chip -meme-p 4 -o /Users/yunfei/2023_ChipSeq/MEME-Chip_output/BfmR-ChIP-49_seed1_intergenic_peak -db /Users/yunfei/meme/motif_databases/PROKARYOTE/collectf.meme /Users/yunfei/2023_ChipSeq/bed_file_for_MEME_500/BfmR-ChIP-49_seed1_intergenic_peak.fa >> /Users/yunfei/2023_ChipSeq/MEME-Chip_output/meme-chip_log.txt 2>&1
meme-chip -meme-p 4 -o /Users/yunfei/2023_ChipSeq/MEME-Chip_output/BfmR-ChIP-49_seed1_all_peak -db /Users/yunfei/meme/motif_databases/PROKARYOTE/collectf.meme /Users/yunfei/2023_ChipSeq/bed_file_for_MEME_500/BfmR-ChIP-49_seed1_all_peak.fa >> /Users/yunfei/2023_ChipSeq/MEME-Chip_output/meme-chip_log.txt 2>&1
