In [1]:
import os

In [2]:
os.chdir('../../../Rat-Pilot/data/tag_directories/')

Create a bigwig hub

In [3]:
%%time
%%bash
makeMultiWigHub.pl rat-pilot rn6 -url http://homer.ucsd.edu/sjroth/Rat-Pilot -webdir /homer_data/www/html/sjroth/Rat-Pilot -d *

Colors that will be used (change with -color or -gradient options):
	Index	Color	Neg. color	Tag Directory
	1	255,150,150	(255,180,180)	H3K27ac1-IL
	2	150,150,255	(180,180,255)	H3K27ac2-PL
	3	150,255,150	(180,255,180)	H3K27ac4-ST
	4	255,200,150	(255,210,180)	H3K27ac5-AC
	5	150,255,220	(180,255,240)	H3K27ac6-AS
	6	200,150,255	(210,180,255)	H3K27ac7-VTA
	7	200,200,150	(210,210,170)	Input1-IL
	8	150,200,200	(170,210,210)	Input2-PL
	9	200,150,200	(210,170,210)	Input4-ST
	10	59,5,171	(244,134,234)	Input5-AC
	11	228,251,92	(82,113,54)	Input6-AS
	12	215,9,1	(206,47,115)	Input7-VTA
	13	217,241,114	(216,190,192)	test.txt


	Once finished, you will want to upload the following hub URL:
		http://homer.ucsd.edu/sjroth/Rat-Pilot/rat-pilot/hub.txt

	If loading to the Wash U Epigenome Browser, use:
		http://homer.ucsd.edu/sjroth/Rat-Pilot/rat-pilot/washU.hub.txt


	Visualization fragment length = 177
	Output file: H3K27ac1-IL/H3K27ac1-IL.ucsc.bigWig
	No need to remove tags to get desired file size
	Ge

CPU times: user 47.2 ms, sys: 20.6 ms, total: 67.8 ms
Wall time: 19min 14s


Vary peak-finding in order to discover optimal parameters. Using striatum as sample.

In [4]:
import sarge
from multiprocessing import Pool

In [5]:
'''
Define a function that can call peaks by varying parameters.
'''
def call_peaks(args):
    
    tag_dir,input_dir,fold_enrichment,poisson,out_name = args
    
    cmd = f'findPeaks {tag_dir} -style histone -i {input_dir} -F {fold_enrichment} -poisson {poisson} -o {out_name}'
    sarge.run(cmd)

In [6]:
tag_dir = 'H3K27ac4-ST/'
input_dir = 'Input4-ST/'

Vary fold enrichment over background

In [7]:
#Set other parameters.
poisson = '1e-3'

In [8]:
cmds = []
for i in range(5,9):
    cmds.append([tag_dir,input_dir,str(i),poisson,f'peaks-FoldEnrich-{i}.txt'])
cmds

[['H3K27ac4-ST/', 'Input4-ST/', '5', '1e-3', 'peaks-FoldEnrich-5.txt'],
 ['H3K27ac4-ST/', 'Input4-ST/', '6', '1e-3', 'peaks-FoldEnrich-6.txt'],
 ['H3K27ac4-ST/', 'Input4-ST/', '7', '1e-3', 'peaks-FoldEnrich-7.txt'],
 ['H3K27ac4-ST/', 'Input4-ST/', '8', '1e-3', 'peaks-FoldEnrich-8.txt']]

Vary poisson p-value cutoff

In [9]:
#Set other parameters.
fold_enrichment = '4'

In [10]:
for i in range(-8,-4):
    cmds.append([tag_dir,input_dir,fold_enrichment,f'1e{i}',f'peaks-Poisson-1e{i}.txt'])
cmds

[['H3K27ac4-ST/', 'Input4-ST/', '5', '1e-3', 'peaks-FoldEnrich-5.txt'],
 ['H3K27ac4-ST/', 'Input4-ST/', '6', '1e-3', 'peaks-FoldEnrich-6.txt'],
 ['H3K27ac4-ST/', 'Input4-ST/', '7', '1e-3', 'peaks-FoldEnrich-7.txt'],
 ['H3K27ac4-ST/', 'Input4-ST/', '8', '1e-3', 'peaks-FoldEnrich-8.txt'],
 ['H3K27ac4-ST/', 'Input4-ST/', '4', '1e-8', 'peaks-Poisson-1e-8.txt'],
 ['H3K27ac4-ST/', 'Input4-ST/', '4', '1e-7', 'peaks-Poisson-1e-7.txt'],
 ['H3K27ac4-ST/', 'Input4-ST/', '4', '1e-6', 'peaks-Poisson-1e-6.txt'],
 ['H3K27ac4-ST/', 'Input4-ST/', '4', '1e-5', 'peaks-Poisson-1e-5.txt']]

Run peak finding

In [11]:
pool = Pool(processes=len(cmds))
pool.map(call_peaks,cmds)
pool.close()

Get distal peaks only

In [12]:
'''
Define a function that can isolate the distal peaks.
'''
def get_distal_peaks(peakfile):
    
    cmd = f'getDistalPeaks.pl {peakfile} rn6 > {peakfile[:-3]}-distal.txt'
    sarge.run(cmd)

In [13]:
peak_files = [x[-1] for x in cmds]
peak_files

['peaks-FoldEnrich-5.txt',
 'peaks-FoldEnrich-6.txt',
 'peaks-FoldEnrich-7.txt',
 'peaks-FoldEnrich-8.txt',
 'peaks-Poisson-1e-8.txt',
 'peaks-Poisson-1e-7.txt',
 'peaks-Poisson-1e-6.txt',
 'peaks-Poisson-1e-5.txt']

In [14]:
pool = Pool(processes=len(peak_files))
pool.map(get_distal_peaks,peak_files)
pool.close()

Perform motif finding

In [15]:
%%time
%%bash
batchFindMotifsGenome.pl rn6 -size 500 -cpu 8 -p 10 \
-preparsedDir /gpfs/data01/bennerlab/home/sjroth/software/homer/data/genomes/rn6/preparsed -f *distal*

	125345 finished


findMotifsGenome.pl "peaks-FoldEnrich-5.-distal.txt" rn6 "Motifs-peaks-FoldEnrich-5.-distal.txt"  -size 500 -p 10 -preparsedDir /gpfs/data01/bennerlab/home/sjroth/software/homer/data/genomes/rn6/preparsed
findMotifsGenome.pl "peaks-FoldEnrich-6.-distal.txt" rn6 "Motifs-peaks-FoldEnrich-6.-distal.txt"  -size 500 -p 10 -preparsedDir /gpfs/data01/bennerlab/home/sjroth/software/homer/data/genomes/rn6/preparsed
findMotifsGenome.pl "peaks-FoldEnrich-7.-distal.txt" rn6 "Motifs-peaks-FoldEnrich-7.-distal.txt"  -size 500 -p 10 -preparsedDir /gpfs/data01/bennerlab/home/sjroth/software/homer/data/genomes/rn6/preparsed
findMotifsGenome.pl "peaks-FoldEnrich-8.-distal.txt" rn6 "Motifs-peaks-FoldEnrich-8.-distal.txt"  -size 500 -p 10 -preparsedDir /gpfs/data01/bennerlab/home/sjroth/software/homer/data/genomes/rn6/preparsed
findMotifsGenome.pl "peaks-Poisson-1e-5.-distal.txt" rn6 "Motifs-peaks-Poisson-1e-5.-distal.txt"  -size 500 -p 10 -preparsedDir /gpfs/data01/bennerlab/home/sjroth/software/homer/da

CPU times: user 881 ms, sys: 337 ms, total: 1.22 s
Wall time: 1h 1min 23s
