In [None]:
%run ../config/init.py
import itertools

### Creating results folders

In [None]:
data_dir = os.path.join(RESULTS, DATASET, 'peak_calling')
result_dir = working_dir(os.path.join(RESULTS, DATASET, 'motif'))
sample_table_file = os.path.join(DATA, DATASET, 'sample_table.csv')
sample_table = pandas.read_csv(sample_table_file, keep_default_na=False)
sample_table.head()

### DNA binding motif finding
For using MEME you should download the databases from http://meme-suite.org/doc/download.html

The Morif database (file `motif_databases.X.X.tgz`) should be uncompressed into the `data/meme` folder resulting 
in a structure like this:

```
    data
    ├── meme
    │   └── motif_databases
    │       ├── ARABD
    │       ├── CIS-BP
    │       ├── CISBP-RNA
    │       ├── ECOLI
    │       ├── EUKARYOTE
    │       ├── FLY
    │       ├── HUMAN
    │       ├── JASPAR
    │       ├── MALARIA
    │       ├── MIRBASE
    │       ├── MOUSE
    │       ├── PROKARYOTE
    │       ├── PROTEIN
    │       ├── RNA
    │       ├── TFBSshape
    │       ├── WORM
    │       └── YEAST
    └── PRJNA238004
```

In this example we will be using E. Coli databases: 
    * SwissRegulon_e_coli.meme
    * dpinteract.meme

Therefore, the variable `MEME_DB_CATEGORY = 'ECOLI'` should be edited accordingly. 

In [None]:
MEME_DB_CATEGORY = 'ECOLI'

MEME_DB_MOTIF_FILE = 'motif_databases.12.19.tgz'
MEME_DB_MOTIF_LINK = 'http://meme-suite.org/meme-software/Databases/motifs/' + MEME_DB_MOTIF_FILE

if not os.path.exists(os.path.join(DATA,'meme')):
    working_dir(os.path.join(DATA,'meme'))
    !curl -o {MEME_DB_MOTIF_FILE} {MEME_DB_MOTIF_LINK}
    !tar xzf {MEME_DB_MOTIF_FILE}
    !rm -v {MEME_DB_MOTIF_FILE}
    

In [None]:
log_file = 'meme.log'
meme_db_path = os.path.join(DATA,'meme', 'motif_databases', MEME_DB_CATEGORY)

meme_yml = {
    'genome': {'class': 'File', 'path': GENOME_FASTA },
    'nmotifs': 10,
    'memedb': [
        {'class': 'File', 'path':  os.path.join(meme_db_path, 'SwissRegulon_e_coli.meme')},
        {'class': 'File', 'path':  os.path.join(meme_db_path, 'dpinteract.meme')}
    ],
    'bed': []
}

   
for c in sample_table['condition'].unique():
    f = os.path.join(data_dir, c + '.border_pair_annot.bed')
    meme_yml['bed'].append({'class': 'File', 'path': f})

if meme_yml['bed']:
    write_to_yaml(meme_yml, 'meme.yml')  
    cmd_header = '{} {}/ChIP-Seq/meme-motif.cwl meme.yml > {} 2>&1 &'.format(CWLRUNNER, CWLWORKFLOWS, log_file)
    run_command(cmd_header)

### Checking command output
Execute next cell until it prints: **Run completed**

In [None]:
check_cwl_command_log(log_file)