# Initiate arrays

In [1]:
import pandas as pd
import numpy as np

In [None]:
#############################################################
### Fill in this information before running this section ####

#Decide if you want to make a sample file
make_bw = False
run_refpoint = False
run_scaled = False

stage_array = ['13', '14', '15', '17', '18', '19', '19plus', '20', '21', '22', '23', '24', '25', '26', '27']
datatype_array = ['occ', 'nucleoatac_signal', 'nucleoatac_signal.smooth']
regionsfilename = 'mikado.loci.sorted.mRNAs.bed'
regionprefix = 'mikado_mRNA'

#############################################################
#############################################################

if make_bw == True:
    for stage in stage_array:
        stagename = stage + 'AB_nucleoatac'
        !echo '### Now analyzing sample '$stagename
    
        for datatype in datatype_array:
            filename = 'S' + stagename + '.' + datatype + '.bedgraph.gz'
            gunzipped = filename.replace('.gz', '')
            !echo '### gunzipping '$filename' into '$gunzipped
            !gunzip -c $filename > $gunzipped
            
            bwname = gunzipped.replace('.bedgraph', '.bw')
            !echo '### converting '$gunzipped' into '$bwname
            !./bedGraphToBigWig $gunzipped phaw_5.0.chrom.sizes $bwname
        
if run_refpoint == True:
    for datatype in datatype_array:
        refpoint_list = ['S' + stagename + 'AB_nucleoatac.' + datatype + '.bw' for stagename in stage_array]
        refpoint_string = ' '.join(refpoint_list)
        refpoint_name = 'Sall_nucATAC_' + datatype + '.' + regionprefix + '.refpoint.gz'
        !echo '### computing refpoint matrix for '{refpoint_string}'\n'
        !computeMatrix reference-point -S {refpoint_string} --regionsFileName {regionsfilename} --outFileName {refpoint_name} -b 400 -a 800 --numberOfProcessors max --skipZeros --missingDataAsZero
        
        refpoint_pdf = refpoint_name.replace('.gz', '.pdf')
        !echo '### drawing figure into '{refpoint_pdf}'\n'
        !plotHeatmap --matrixFile {refpoint_name} --outFileName {refpoint_pdf}

if run_scaled == True:
    for datatype in datatype_array:
        scaled_list = ['S' + stagename + 'AB_nucleoatac.' + datatype + '.bw' for stagename in stage_array]
        scaled_string = ' '.join(scaled_list)
        scaled_name = 'Sall_nucATAC_' + datatype + '.' + regionprefix + '.scaled.gz'
        !echo '### computing scaled matrix for '{scaled_string}'\n'
        !computeMatrix scale-regions -S {scaled_string} --regionsFileName {regionsfilename} --outFileName {scaled_name} -b 400 -a 400 --numberOfProcessors max --skipZeros --missingDataAsZero
        
        scaled_pdf = scaled_name.replace('.gz', '.pdf')
        !echo '### drawing figure into '{scaled_pdf}'\n'
        !plotHeatmap --matrixFile {scaled_name} --outFileName {scaled_pdf}

#genericprefix = 'Sall_nucATAC_*' + regionprefix + '.pdf'
#!mv {genericprefix} mikado_NucATAC

In [2]:
#############################################################
### Fill in this information before running this section ####

#Decide if you want to make a sample file
run_refpoint_mfuzz = False
run_refpoint_mfuzz_center = False
run_scaled_mfuzz = False

stage_array = ['13', '14', '15', '17', '18', '19', '19plus', '20', '21', '22', '23', '24', '25', '26', '27']
datatype_array = ['occ', 'nucleoatac_signal', 'nucleoatac_signal.smooth']
acore_peak_filename_pattern = 'Mfuzz_9acores_acore*_*_peaks.bed'
regionprefix = 'mfuzz_9acores'

#############################################################
#############################################################

acores = 9
acore_peaks_list = [acore_peak_filename_pattern.replace('*_*', str(i)) for i in np.arange(1, acores + 1)]
acore_peaks_string = ' '.join(acore_peaks_list)

if run_refpoint_mfuzz == True:
    for datatype in datatype_array:
        refpoint_list = ['S' + stagename + 'AB_nucleoatac.' + datatype + '.bw' for stagename in stage_array]
        refpoint_string = ' '.join(refpoint_list)
        refpoint_name = 'Sall_nucATAC_' + datatype + '.' + regionprefix + '.refpoint.gz'
        !echo '### computing refpoint matrix for '{refpoint_string}'\n'
        !computeMatrix reference-point -S {refpoint_string} --regionsFileName {acore_peaks_string} --outFileName {refpoint_name} -b 400 -a 400 --numberOfProcessors max --skipZeros --missingDataAsZero
        
        refpoint_pdf = refpoint_name.replace('.gz', '.pdf')
        !echo '### drawing figure into '{refpoint_pdf}'\n'
        !plotHeatmap --matrixFile {refpoint_name} --outFileName {refpoint_pdf}
    
    refpoint_list_2 = ['bigwig/S' + stagename + 'AB_bothruns_q10.Genrich_sorted.ATAC.q005.final.bw' for stagename in stage_array]
    refpoint_string_2 = ' '.join(refpoint_list_2)
    refpoint_name_2 = 'Sall_OmniATAC' + '.' + regionprefix + '.refpoint.gz'
    !echo '### computing refpoint matrix for '{refpoint_string_2}'\n'
    !computeMatrix reference-point -S {refpoint_string_2} --regionsFileName {acore_peaks_string} --outFileName {refpoint_name_2} -b 400 -a 400 --numberOfProcessors max --skipZeros --missingDataAsZero
    
    refpoint_pdf_2 = refpoint_name_2.replace('.gz', '.pdf')
    !echo '### drawing figure into '{refpoint_pdf_2}'\n'
    !plotHeatmap --matrixFile {refpoint_name_2} --outFileName {refpoint_pdf_2}

if run_refpoint_mfuzz_center == True:
    for datatype in datatype_array:
        refpoint_list = ['S' + stagename + 'AB_nucleoatac.' + datatype + '.bw' for stagename in stage_array]
        refpoint_string = ' '.join(refpoint_list)
        refpoint_name = 'Sall_nucATAC_' + datatype + '.' + regionprefix + '.refpoint-center.gz'
        !echo '### computing refpoint matrix for '{refpoint_string}'\n'
        !computeMatrix reference-point -S {refpoint_string} --regionsFileName {acore_peaks_string} --referencePoint=center --outFileName {refpoint_name} -b 400 -a 400 --numberOfProcessors max --skipZeros --missingDataAsZero
        
        refpoint_pdf = refpoint_name.replace('.gz', '.pdf')
        !echo '### drawing figure into '{refpoint_pdf}'\n'
        !plotHeatmap --matrixFile {refpoint_name} --outFileName {refpoint_pdf}
    
    refpoint_list_2 = ['bigwig/S' + stagename + 'AB_bothruns_q10.Genrich_sorted.ATAC.q005.final.bw' for stagename in stage_array]
    refpoint_string_2 = ' '.join(refpoint_list_2)
    refpoint_name_2 = 'Sall_OmniATAC' + '.' + regionprefix + '.refpoint-center.gz'
    !echo '### computing refpoint matrix for '{refpoint_string_2}'\n'
    !computeMatrix reference-point -S {refpoint_string_2} --regionsFileName {acore_peaks_string} --referencePoint=center --outFileName {refpoint_name_2} -b 400 -a 400 --numberOfProcessors max --skipZeros --missingDataAsZero
    
    refpoint_pdf_2 = refpoint_name_2.replace('.gz', '.pdf')
    !echo '### drawing figure into '{refpoint_pdf_2}'\n'
    !plotHeatmap --matrixFile {refpoint_name_2} --outFileName {refpoint_pdf_2}

if run_scaled_mfuzz == True:
    for datatype in datatype_array:
        scaled_list = ['S' + stagename + 'AB_nucleoatac.' + datatype + '.bw' for stagename in stage_array]
        scaled_string = ' '.join(scaled_list)
        scaled_name = 'Sall_nucATAC_' + datatype + '.' + regionprefix + '.scaled.gz'
        !echo '### computing scaled matrix for '{scaled_string}'\n'
        !computeMatrix scale-regions -S {scaled_string} --regionsFileName {acore_peaks_string} --outFileName {scaled_name} -b 400 -a 400 --numberOfProcessors max --skipZeros --missingDataAsZero
        
        scaled_pdf = scaled_name.replace('.gz', '.pdf')
        !echo '### drawing figure into '{scaled_pdf}'\n'
        !plotHeatmap --matrixFile {scaled_name} --outFileName {scaled_pdf}
    
    scaled_list_2 = ['bigwig/S' + stagename + 'AB_bothruns_q10.Genrich_sorted.ATAC.q005.final.bw' for stagename in stage_array]
    scaled_string_2 = ' '.join(scaled_list_2)
    scaled_name_2 = 'Sall_OmniATAC' + '.' + regionprefix + '.refpoint.gz'
    !echo '### computing refpoint matrix for '{scaled_string_2}'\n'
    !computeMatrix scale-regions -S {scaled_string_2} --regionsFileName {acore_peaks_string} --outFileName {scaled_name_2} -b 400 -a 400 --numberOfProcessors max --skipZeros --missingDataAsZero
    
    scaled_pdf_2 = scaled_name_2.replace('.gz', '.pdf')
    !echo '### drawing figure into '{scaled_pdf_2}'\n'
    !plotHeatmap --matrixFile {scaled_name_2} --outFileName {scaled_pdf_2}

#genericprefix = 'Sall_*ATAC_*' + regionprefix + '*.pdf'
#!mv {genericprefix} mikado_nucATAC

'Mfuzz_9acores_acore1_peaks.bed Mfuzz_9acores_acore2_peaks.bed Mfuzz_9acores_acore3_peaks.bed Mfuzz_9acores_acore4_peaks.bed Mfuzz_9acores_acore5_peaks.bed Mfuzz_9acores_acore6_peaks.bed Mfuzz_9acores_acore7_peaks.bed Mfuzz_9acores_acore8_peaks.bed Mfuzz_9acores_acore9_peaks.bed'

zsh:1: no matches found: Sall_nucATAC_*mfuzz_9acores.pdf


In [None]:
#############################################################
### Fill in this information before running this section ####

#Decide if you want to make a sample file
run_scale_occ = True
run_refpoint_mfuzz_occscaled = True
run_scaled_mfuzz_occscaled = True

stage_array = ['13', '14', '15', '17', '18', '19', '19plus', '20', '21', '22', '23', '24', '25', '26', '27']
datatype_array = ['occ']
acore_peak_filename_pattern = 'Mfuzz_9acores_acore*_*_peaks.bed'
regionprefix = 'mfuzz_9acores'

#############################################################
#############################################################

acores = 9
acore_peaks_list = [acore_peak_filename_pattern.replace('*_*', str(i)) for i in np.arange(1, acores + 1)]
acore_peaks_string = ' '.join(acore_peaks_list)

if run_scale_occ:
    occ_list = ['S' + stage + 'AB_nucleoatac.occ.bedgraph' for stage in stage_array]
    
    for occ in occ_list:
        occ_norm = occ.replace('.occ.', '.occ.norm.')
        !python normalize_bedgraph/src/normalize_bedgraph.py --to-mean-signal 1.0 {occ} > {occ_norm}
        bwname = occ_norm.replace('.bedgraph', '.bw')
        !./bedGraphToBigWig {occ_norm} phaw_5.0.chrom.sizes {bwname}

if run_refpoint_mfuzz_occscaled == True:
    occ_norm_list = ['S' + stage + 'AB_nucleoatac.occ.norm.bw' for stage in stage_array]
    occ_norm_string = ' '.join(occ_norm_list)

    refpoint_name = 'Sall_nucATAC_occ.norm.' + regionprefix + '.refpoint.gz'
    !echo '### computing refpoint matrix for '{refpoint_string}'\n'
    !computeMatrix reference-point -S {occ_norm_string} --regionsFileName {acore_peaks_string} --referencePoint=center --outFileName {refpoint_name} -b 400 -a 400 --numberOfProcessors max --skipZeros --missingDataAsZero
        
    refpoint_pdf = refpoint_name.replace('.gz', '.pdf')
    !echo '### drawing figure into '{refpoint_pdf}'\n'
    !plotHeatmap --matrixFile {refpoint_name} --outFileName {refpoint_pdf}
    
    genericprefix = 'Sall_nucATAC_occ.norm.*.pdf'
    !mv {genericprefix} mikado_nucATAC

if run_scaled_mfuzz_occscaled == True:
    occ_norm_list = ['S' + stage + 'AB_nucleoatac.occ.norm.bw' for stage in stage_array]
    occ_norm_string = ' '.join(occ_norm_list)
    
    scaled_name_2 = 'Sall_nucATAC_occ.norm.' + regionprefix + '.scaled.gz'
    !echo '### computing scaled matrix for '{occ_norm_string}'\n'
    !computeMatrix scale-regions -S {occ_norm_string} --regionsFileName {acore_peaks_string} --outFileName {scaled_name_2} -b 400 -a 400 --numberOfProcessors max --skipZeros --missingDataAsZero
    
    scaled_pdf_2 = scaled_name_2.replace('.gz', '.pdf')
    !echo '### drawing figure into '{scaled_pdf_2}'\n'
    !plotHeatmap --matrixFile {scaled_name_2} --outFileName {scaled_pdf_2}

    genericprefix = 'Sall_nucATAC_occ.norm.*.pdf'
    !mv {genericprefix} mikado_nucATAC