# ChIP-seq

In [7]:
import os
import pandas as pd
import sys
import numpy as np
import sys
import re
sys.path.insert(0, '../..')
import itertools

from genepy.utils import helper as h
from genepy.epigenetics import chipseq as chip
from genepy.epigenetics import plot as genepyPlot

import dalmatian as dm
import pyBigWig

from bokeh.plotting import *
from IPython.display import IFrame
import igv
from biomart import BiomartServer
import io

from sklearn.manifold import MDS, TSNE
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
from sklearn.cluster import AgglomerativeClustering
from sklearn.mixture import GaussianMixture

output_notebook()
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## MYB_degraded_v1

Chipseq of MYB degron in MV411 cells at 1h of degradation with 0.5$\mu$M VHL (with Drosophilia spike-in control)
<br>paired end

Check existing project names. Do __NOT__ overwrite an existing project!

In [3]:
! gsutil ls gs://amlproject/Chip/

gs://amlproject/Chip/GEO/
gs://amlproject/Chip/IRF2BP2_degraded_v1/
gs://amlproject/Chip/IRF2BP2_degraded_v2/
gs://amlproject/Chip/IRF2BP2_degraded_v3/
gs://amlproject/Chip/IRF2BP2_degraded_v4/
gs://amlproject/Chip/IRF2BP2_degraded_v5/
gs://amlproject/Chip/IRF2BP2_degraded_v6/
gs://amlproject/Chip/IRF8_degraded_v1/
gs://amlproject/Chip/MEF2CMEF2D_knockout_v1/
gs://amlproject/Chip/MEF2D_degraded_v1/
gs://amlproject/Chip/MEF2D_degraded_v2/
gs://amlproject/Chip/MEF2D_degraded_v3/
gs://amlproject/Chip/MEF2D_degraded_v4/
gs://amlproject/Chip/PDX/
gs://amlproject/Chip/cobinding/
gs://amlproject/Chip/fastqs/
gs://amlproject/Chip/knockouts_v1/


In [4]:
! gsutil ls gs://transfer-amlproject/

gs://transfer-amlproject/Hello_Mo_Test
gs://transfer-amlproject/201120_MP8439_fastq/
gs://transfer-amlproject/210331_MP8777_fastq/
gs://transfer-amlproject/210514_MP8847_fastq/
gs://transfer-amlproject/210722_MP9119_fastq/
gs://transfer-amlproject/210823_TL9266_fastq/
gs://transfer-amlproject/211012_MP9377_fastq/
gs://transfer-amlproject/211025_MP9377_fastq/
gs://transfer-amlproject/211108_MP9454_fastq/
gs://transfer-amlproject/211217_MP9543_fastq/
gs://transfer-amlproject/220506_MP10011_fastq/
gs://transfer-amlproject/220506_MP10046_fastq/
gs://transfer-amlproject/Cobinding_additional/
gs://transfer-amlproject/IRF2BP2_degraded_v6/
gs://transfer-amlproject/MEF2D_degraded_v4/
gs://transfer-amlproject/MONOMAC_synMEF2CD/
gs://transfer-amlproject/RNP_MEIS1/
gs://transfer-amlproject/RNPv5/
gs://transfer-amlproject/additional_degraded_v1/
gs://transfer-amlproject/chipseq_MEF2C_2h_degraded/
gs://transfer-amlproject/chipseq_MEF2D_degraded/
gs://transfer-amlproject/chipseq_

In [3]:
project = "MYB_degraded_v1"
loc = "220506_MP10046_fastq" # transfer AML folder
datatype = "chipseq"
project_dir = datatype+"_"+project

In [16]:
project_dir

'chipseq_MYB_degraded_v1'

In [7]:
fastq = ! gsutil ls gs://transfer-amlproject/$loc/*.fastq.gz

for f in fastq:
    print(os.path.basename(f))

20220506_MED1-DMSO_MP10046_S112_R1_001.fastq.gz
20220506_MED1-DMSO_MP10046_S112_R2_001.fastq.gz
20220506_MED1-VHL_MP10046_S116_R1_001.fastq.gz
20220506_MED1-VHL_MP10046_S116_R2_001.fastq.gz
20220506_MYB-DMSO_MP10046_S110_R1_001.fastq.gz
20220506_MYB-DMSO_MP10046_S110_R2_001.fastq.gz
20220506_MYB-VHL_MP10046_S114_R1_001.fastq.gz
20220506_MYB-VHL_MP10046_S114_R2_001.fastq.gz
20220506_MYC-DMSO_MP10046_S111_R1_001.fastq.gz
20220506_MYC-DMSO_MP10046_S111_R2_001.fastq.gz
20220506_MYC-VHL_MP10046_S115_R1_001.fastq.gz
20220506_MYC-VHL_MP10046_S115_R2_001.fastq.gz
20220506_PolII-DMSO_MP10046_S113_R1_001.fastq.gz
20220506_PolII-DMSO_MP10046_S113_R2_001.fastq.gz
20220506_PolII-VHL_MP10046_S117_R1_001.fastq.gz
20220506_PolII-VHL_MP10046_S117_R2_001.fastq.gz


In [8]:
mkdir ../data/$project_dir/ && mkdir ../data/$project_dir/qc/ #make dir in /home/monika/AMLproject/data

In [9]:
! gsutil -m cp gs://transfer-amlproject/$loc/multiqc_report.html ../data/$project_dir/qc/
! gsutil -m cp -r gs://transfer-amlproject/$loc/Reports/ ../data/$project_dir/qc/
! gsutil -m cp -r gs://transfer-amlproject/$loc/multiqc_data/ ../data/$project_dir/qc/

Copying gs://transfer-amlproject/220506_MP10046_fastq/multiqc_report.html...
/ [1/1 files][  1.2 MiB/  1.2 MiB] 100% Done                                    
Operation completed over 1 objects/1.2 MiB.                                      
Copying gs://transfer-amlproject/220506_MP10046_fastq/Reports/20220506_MED1-DMSO_MP10046_S112_R1_001_fastqc.html...
Copying gs://transfer-amlproject/220506_MP10046_fastq/Reports/20220506_MED1-DMSO_MP10046_S112_R1_001_fastqc.zip...
Copying gs://transfer-amlproject/220506_MP10046_fastq/Reports/20220506_MED1-DMSO_MP10046_S112_R2_001_fastqc.html...
Copying gs://transfer-amlproject/220506_MP10046_fastq/Reports/20220506_MED1-DMSO_MP10046_S112_R2_001_fastqc.zip...
Copying gs://transfer-amlproject/220506_MP10046_fastq/Reports/20220506_MED1-VHL_MP10046_S116_R1_001_fastqc.html...
Copying gs://transfer-amlproject/220506_MP10046_fastq/Reports/20220506_MED1-VHL_MP10046_S116_R1_001_fastqc.zip...
Copying gs://transfer-amlproject/220506_MP10046_fastq/Reports/2022050

In [10]:
! gsutil -m cp gs://transfer-amlproject/$loc/*  gs://transfer-amlproject/$project_dir/

Omitting prefix "gs://transfer-amlproject/220506_MP10046_fastq/Reports/". (Did you mean to do cp -r?)
Omitting prefix "gs://transfer-amlproject/220506_MP10046_fastq/multiqc_data/". (Did you mean to do cp -r?)
Copying gs://transfer-amlproject/220506_MP10046_fastq/._multiqc_report.html [Content-Type=text/html]...
Copying gs://transfer-amlproject/220506_MP10046_fastq/20220506_MED1-DMSO_MP10046_S112_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/220506_MP10046_fastq/20220506_MED1-DMSO_MP10046_S112_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/220506_MP10046_fastq/20220506_MED1-VHL_MP10046_S116_R1_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/220506_MP10046_fastq/20220506_MED1-VHL_MP10046_S116_R2_001.fastq.gz [Content-Type=application/octet-stream]...
Copying gs://transfer-amlproject/220506_MP10046_fastq/20220506_MYB-DMSO_MP10046_S110_R1_001.fastq.gz [Content

### download data

In [20]:
! mkdir ../../data/$project_dir && mkdir ../../data/$project_dir/fastqs \
&& gsutil -m cp gs://transfer-amlproject/$project_dir/* ../../data/$project_dir/fastqs

Copying gs://transfer-amlproject/chipseq_MYB_degraded_v1/._multiqc_report.html...
Copying gs://transfer-amlproject/chipseq_MYB_degraded_v1/20220506_MED1-DMSO_MP10046_S112_R1_001.fastq.gz...
==> NOTE: You are downloading one or more large file(s), which would            
run significantly faster if you enabled sliced object downloads. This
feature is enabled by default but requires that compiled crcmod be
installed (see "gsutil help crcmod").

Copying gs://transfer-amlproject/chipseq_MYB_degraded_v1/20220506_MED1-DMSO_MP10046_S112_R2_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MYB_degraded_v1/20220506_MED1-VHL_MP10046_S116_R1_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MYB_degraded_v1/20220506_MED1-VHL_MP10046_S116_R2_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MYB_degraded_v1/20220506_MYB-DMSO_MP10046_S110_R1_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MYB_degraded_v1/20220506_MYB-VHL_MP10046_S114_R1_001.fastq.gz...
Copying gs://trans

In [33]:
! gsutil -m cp gs://transfer-amlproject/$project_dir/* ../../data/$project_dir/fastqs

Copying gs://transfer-amlproject/chipseq_MYB_degraded_v1/._multiqc_report.html...
Copying gs://transfer-amlproject/chipseq_MYB_degraded_v1/20220506_MED1-VHL_MP10046_S116_R2_001.fastq.gz...
==> NOTE: You are downloading one or more large file(s), which would            
run significantly faster if you enabled sliced object downloads. This
feature is enabled by default but requires that compiled crcmod be
installed (see "gsutil help crcmod").

Copying gs://transfer-amlproject/chipseq_MYB_degraded_v1/20220506_MED1-DMSO_MP10046_S112_R1_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MYB_degraded_v1/20220506_MED1-DMSO_MP10046_S112_R2_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MYB_degraded_v1/20220506_MED1-VHL_MP10046_S116_R1_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MYB_degraded_v1/20220506_MYB-DMSO_MP10046_S110_R1_001.fastq.gz...
Copying gs://transfer-amlproject/chipseq_MYB_degraded_v1/20220506_MYB-DMSO_MP10046_S110_R2_001.fastq.gz...
Copying gs://tran

In [28]:
! rm ../../data/$project_dir/fastqs/multiqc_report.html

In [21]:
fastq_name_list = [i.split("/")[4].split(".")[0] for i in fastq]
len(fastq_name_list)
fastq_name_list

['20220506_MED1-DMSO_MP10046_S112_R1_001',
 '20220506_MED1-DMSO_MP10046_S112_R2_001',
 '20220506_MED1-VHL_MP10046_S116_R1_001',
 '20220506_MED1-VHL_MP10046_S116_R2_001',
 '20220506_MYB-DMSO_MP10046_S110_R1_001',
 '20220506_MYB-DMSO_MP10046_S110_R2_001',
 '20220506_MYB-VHL_MP10046_S114_R1_001',
 '20220506_MYB-VHL_MP10046_S114_R2_001',
 '20220506_MYC-DMSO_MP10046_S111_R1_001',
 '20220506_MYC-DMSO_MP10046_S111_R2_001',
 '20220506_MYC-VHL_MP10046_S115_R1_001',
 '20220506_MYC-VHL_MP10046_S115_R2_001',
 '20220506_PolII-DMSO_MP10046_S113_R1_001',
 '20220506_PolII-DMSO_MP10046_S113_R2_001',
 '20220506_PolII-VHL_MP10046_S117_R1_001',
 '20220506_PolII-VHL_MP10046_S117_R2_001']

In [34]:
rename = {
    "20220506_MYB-DMSO_MP10046_S110": "mp990-MV411_MYB_DMSO-MYB-r1",
    "20220506_MYC-DMSO_MP10046_S111": "mp991-MV411_MYB_DMSO-MYC-r1",
    "20220506_MED1-DMSO_MP10046_S112": "mp992-MV411_MYB_DMSO-MED1-r1",
    "20220506_PolII-DMSO_MP10046_S113": "mp993-MV411_MYB_DMSO-POLII_total-r1",
    
    "20220506_MYB-VHL_MP10046_S114": "mp994-MV411_MYB_VHL-MYB-r1",
    "20220506_MYC-VHL_MP10046_S115": "mp995-MV411_MYB_VHL-MYC-r1",
    "20220506_MED1-VHL_MP10046_S116": "mp996-MV411_MYB_VHL-MED1-r1",
    "20220506_PolII-VHL_MP10046_S117": "mp997-MV411_MYB_VHL-POLII_total-r1",
}

In [35]:
a = ! ls ../../data/$project_dir/fastqs
a

['20220506_MED1-DMSO_MP10046_S112_R1_001.fastq.gz',
 '20220506_MED1-DMSO_MP10046_S112_R2_001.fastq.gz',
 '20220506_MED1-VHL_MP10046_S116_R1_001.fastq.gz',
 '20220506_MED1-VHL_MP10046_S116_R2_001.fastq.gz',
 '20220506_MYB-DMSO_MP10046_S110_R1_001.fastq.gz',
 '20220506_MYB-DMSO_MP10046_S110_R2_001.fastq.gz',
 '20220506_MYB-VHL_MP10046_S114_R1_001.fastq.gz',
 '20220506_MYB-VHL_MP10046_S114_R2_001.fastq.gz',
 '20220506_MYC-DMSO_MP10046_S111_R1_001.fastq.gz',
 '20220506_MYC-DMSO_MP10046_S111_R2_001.fastq.gz',
 '20220506_MYC-VHL_MP10046_S115_R1_001.fastq.gz',
 '20220506_MYC-VHL_MP10046_S115_R2_001.fastq.gz',
 '20220506_PolII-DMSO_MP10046_S113_R1_001.fastq.gz',
 '20220506_PolII-DMSO_MP10046_S113_R2_001.fastq.gz',
 '20220506_PolII-VHL_MP10046_S117_R1_001.fastq.gz',
 '20220506_PolII-VHL_MP10046_S117_R2_001.fastq.gz']

In [36]:
# rename fastqs locally
for val in a:
    rep = val
    for k,v in rename.items():
        rep = rep.replace(k,v)
    ! mv ../../data/$project_dir/fastqs/$val ../../data/$project_dir/fastqs/$rep
    ! gsutil cp gs://transfer-amlproject/$project_dir/$val gs://amlproject/Chip/$project/fastqs/$rep

In [40]:
from gsheets import Sheets
sheets = Sheets.from_files('~/.client_secret_web.json', '~/.storage.json')
url = "https://docs.google.com/spreadsheets/d/1yFLjYB1McU530JnLgL0QIMAKIkVl3kl0_LCHje2gk8U/edit?ts=5fab1071#gid=738732237"
gsheet = sheets.get(url).sheets[10].to_frame()

In [42]:
gsheet.head()

Unnamed: 0,id,cell line,replicate,protein,quality,time,paired_end,isNarrow,matching input name,processed,...,total,ratio to droso,unique mapped reads(droso),scaling factor,prefix,Total QC,Peaks,Compensated Wigs,Output,Bigwigs
0,mp100,U937,1,INPUT,,,n,,,Y,...,,,,,https://storage.cloud.google.com/,https://console.cloud.google.com/storage/brows...,https://console.cloud.google.com/storage/brows...,,https://console.cloud.google.com/storage/brows...,https://console.cloud.google.com/storage/brows...
1,mp101,NOMO1,1,INPUT,,,n,,,Y,...,,,,,,,,,,
2,mp102,UT7,1,INPUT,,,n,,,Y,...,,,,,,,,,,
3,mp106,MV411,1,MYB,,,n,y,mp99,Y,...,,,,,,,,,,
4,mp109,M6,1,CEBPA,,,n,y,mp98,Y,...,,,,,,,,,,


In [45]:
fastq = ! gsutil ls gs://amlproject/Chip/$project/fastqs/

In [55]:
# design matrix
df = {
"fastq_1": [],
"fastq_2": [],
"antibody": [],
"group": [],
"replicate": [],
"control": []
}
for val in h.grouped(fastq, 2):
    row = gsheet[gsheet.id==val[0].split('/')[-1].split('-')[0]]
    df['group'].append("MV4"+row['name'].values[0].split('-r')[0].split('-MV4')[1])
    df['replicate'].append(row['name'].values[0].split('-r')[1])
    df['fastq_1'].append(val[0])
    df['fastq_2'].append(val[1])
    df['antibody'].append(row['protein'].values[0])
    df['control'].append("INPUT")
df['group'].append('INPUT')
df['replicate'].append(1)
df['fastq_1'].append('gs://amlproject/Chip/fastqs/paired_end/mp845-MV411-INPUT-r2_R1.fastq.gz')
df['fastq_2'].append('gs://amlproject/Chip/fastqs/paired_end/mp845-MV411-INPUT-r2_R2.fastq.gz')
df['antibody'].append("")
df['control'].append("")
df = pd.DataFrame(df)

In [56]:
df

Unnamed: 0,fastq_1,fastq_2,antibody,group,replicate,control
0,gs://amlproject/Chip/MYB_degraded_v1/fastqs/mp...,gs://amlproject/Chip/MYB_degraded_v1/fastqs/mp...,MYB,MV411_MYB_DMSO-MYB,1,INPUT
1,gs://amlproject/Chip/MYB_degraded_v1/fastqs/mp...,gs://amlproject/Chip/MYB_degraded_v1/fastqs/mp...,MYC,MV411_MYB_DMSO-MYC,1,INPUT
2,gs://amlproject/Chip/MYB_degraded_v1/fastqs/mp...,gs://amlproject/Chip/MYB_degraded_v1/fastqs/mp...,MED1,MV411_MYB_DMSO-MED1,1,INPUT
3,gs://amlproject/Chip/MYB_degraded_v1/fastqs/mp...,gs://amlproject/Chip/MYB_degraded_v1/fastqs/mp...,POLII_total,MV411_MYB_DMSO-POLII_total,1,INPUT
4,gs://amlproject/Chip/MYB_degraded_v1/fastqs/mp...,gs://amlproject/Chip/MYB_degraded_v1/fastqs/mp...,MYB,MV411_MYB_VHL-MYB,1,INPUT
5,gs://amlproject/Chip/MYB_degraded_v1/fastqs/mp...,gs://amlproject/Chip/MYB_degraded_v1/fastqs/mp...,MYC,MV411_MYB_VHL-MYC,1,INPUT
6,gs://amlproject/Chip/MYB_degraded_v1/fastqs/mp...,gs://amlproject/Chip/MYB_degraded_v1/fastqs/mp...,MED1,MV411_MYB_VHL-MED1,1,INPUT
7,gs://amlproject/Chip/MYB_degraded_v1/fastqs/mp...,gs://amlproject/Chip/MYB_degraded_v1/fastqs/mp...,POLII_total,MV411_MYB_VHL-POLII_total,1,INPUT
8,gs://amlproject/Chip/fastqs/paired_end/mp845-M...,gs://amlproject/Chip/fastqs/paired_end/mp845-M...,,INPUT,1,


In [50]:
# nextflow CSV
design_file = "../nextflow/"+project+"_design.csv"
design_file_path = os.path.abspath(design_file)
print(design_file_path)
# saved with column order as: group, replicate, fastq_1, fastq_2, antibody, control
df[df.columns[[3,4,0,1,2,5]]].to_csv(design_file_path,index=False)

/home/monika/AMLproject/nextflow/MYB_degraded_v1_design.csv


In [58]:
df = pd.read_csv(design_file) # read in design file
df = df.replace(np.nan, '', regex=True)
df = df[["fastq_1", "fastq_2", "antibody", "group", "replicate", "control"]]
df.iloc[:1,:]

Unnamed: 0,fastq_1,fastq_2,antibody,group,replicate,control
0,gs://amlproject/Chip/MYB_degraded_v1/fastqs/mp...,gs://amlproject/Chip/MYB_degraded_v1/fastqs/mp...,MYB,MV411_MYB_DMSO-MYB,1,INPUT


### run nextflow

__about__

0. you need to have a google project set up with a billing account
1. you need to activte your APIs this way: https://cloud.google.com/life-sciences/docs/tutorials/nextflow?hl=fr
2. nextflow needs to be installed with this installation command `export NXF_MODE=google && curl https://get.nextflow.io | bash`

In [59]:
! cd /home/monika && sudo ./nextflow log ## to get access to the previous runs

TIMESTAMP          	DURATION  	RUN NAME         	STATUS	REVISION ID	SESSION ID                          	COMMAND                                                                                                                                                                                                                                                                                                                                                                                                          
2021-07-08 20:08:32	-         	hopeful_albattani	-     	fb4924dd0d 	117e88ad-5b5f-4ad3-a368-4ebee2f804c8	nextflow run monikaperez/chipseq --paired_end --seq_center DFCI --email jkobject@gmail.com --input AMLproject/nextflow/MEF2D_degraded_v4_design.csv --genome GRCh38 --max_cpus 16 -profile mpcloud -w 'gs://workamlproject/MEF2Cv4res' --spiking --skip_diff_analysis --outdir 'gs://workamlproject/MEF2Cv4' --tracedir AMLproject/nextflow/MEF2Cdegradedv4/                                       

In [65]:
# write nextflow command to string
export_header = "export NXF_mode=google && sudo"
nextflow_call = "./nextflow run monikaperez/chipseq" # run outside of AMLproject dir
endedness = "--paired_end"
seq_center = "--seq_center " + "'DFCI'"
email = "--email " + "'monikawenperez@gmail.com'"
design_input = "--input " + design_file_path
genome = "--genome " + "GRCh38"
max_cpus = "--max_cpus " + str(16)
profile = "-profile " + "mpcloud"
work_dir = "-w " + "gs://workamlproject/{}res".format(project)
outdir = "--outdir "+"gs://workamlproject/"+project
spiking = "--spiking"
tracedir = "--tracedir AMLproject/nextflow/"+project
#peak_type = "--narrow_peak"

nextflow_args = [export_header, nextflow_call, endedness,
                 seq_center, email, design_input, 
                 genome, max_cpus, profile, 
                 work_dir, outdir, spiking, tracedir,
                 "--skip_diff_analysis"]

# process chips
print("Copy and paste string into command line:\nRun from outside of AMLproject")
print("\n".join(nextflow_args))

Copy and paste string into command line:

export NXF_mode=google && sudo
./nextflow run monikaperez/chipseq
--paired_end
--seq_center 'DFCI'
--email 'monikawenperez@gmail.com'
--input /home/monika/AMLproject/nextflow/MYB_degraded_v1_design.csv
--genome GRCh38
--max_cpus 16
-profile mpcloud
-w gs://workamlproject/MYB_degraded_v1res
--outdir gs://workamlproject/MYB_degraded_v1
--spiking
--tracedir AMLproject/nextflow/MYB_degraded_v1
--skip_diff_analysis


In [66]:
print(" ".join(nextflow_args))

export NXF_mode=google && sudo ./nextflow run monikaperez/chipseq --paired_end --seq_center 'DFCI' --email 'monikawenperez@gmail.com' --input /home/monika/AMLproject/nextflow/MYB_degraded_v1_design.csv --genome GRCh38 --max_cpus 16 -profile mpcloud -w gs://workamlproject/MYB_degraded_v1res --outdir gs://workamlproject/MYB_degraded_v1 --spiking --tracedir AMLproject/nextflow/MYB_degraded_v1 --skip_diff_analysis


move results to main bucket

In [72]:
# move results to main bucket
! gsutil -m cp -r gs://workamlproject/$project/* gs://amlproject/Chip/$project/

Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam.bai [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MED1_R1.mLb.clN.sorted.bam [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MYC_R1.mLb.clN.sorted.bam.bai [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MYB_R1.mLb.clN.sorted.bam.bai [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MYB_R1.mLb.clN.sorted.bam [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MYC_R1.mLb.clN.sorted.bam [Content-Type=application/octet-st

Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_MYB_DMSO-POLII_total_R1.plotFingerprint.qcmetrics.txt [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_MYB_DMSO-POLII_total_R1.plotFingerprint.raw.txt [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_MYB_VHL-MED1_R1.plotFingerprint.pdf [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_MYB_VHL-MED1_R1.plotFingerprint.qcmetrics.txt [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_MYB_VHL-MED1_R1.plotFingerprint.raw.txt [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotFingerprint/MV4

Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotProfile/MV411_MYB_VHL-MED1_R1.plotProfile.pdf [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotProfile/MV411_MYB_VHL-MED1_R1.plotProfile.tab [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotProfile/MV411_MYB_VHL-MYB_R1.computeMatrix.mat.gz [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotProfile/MV411_MYB_VHL-MYB_R1.computeMatrix.vals.mat.tab [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotProfile/MV411_MYB_VHL-MYB_R1.plotHeatmap.mat.tab [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotProfile/MV411_MYB_VHL-MYB_R1.plotHeatmap.pdf [Content-Type=application/octet-strea

Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_MYB_VHL-POLII_total_R1_peaks.gappedPeak [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_MYB_VHL-POLII_total_R1_peaks.xls [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/consensus/MED1/MED1.consensus_peaks.annotatePeaks.txt [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/consensus/MED1/MED1.consensus_peaks.bed [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/consensus/MED1/MED1.consensus_peaks.boolean.annotatePeaks.txt [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/consensus/MED1/MED1.consensus_peaks.boolean.intersect.plot.pdf [Content-Type=appli

Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/phantompeakqualtools/MV411_MYB_VHL-POLII_total_R1_spp_correlation_mqc.tsv [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/phantompeakqualtools/MV411_MYB_VHL-POLII_total_R1_spp_nsc_mqc.tsv [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/phantompeakqualtools/MV411_MYB_VHL-POLII_total_R1_spp_rsc_mqc.tsv [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/picard_metrics/INPUT_R1.mLb.clN.CollectMultipleMetrics.alignment_summary_metrics [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/picard_metrics/INPUT_R1.mLb.clN.CollectMultipleMetrics.base_distribution_by_cycle_metrics [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/picard_metrics/INPUT_R1.mLb.clN.Coll

Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/picard_metrics/pdf/MV411_MYB_VHL-MYB_R1.mLb.clN.CollectMultipleMetrics.quality_distribution.pdf [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/picard_metrics/pdf/MV411_MYB_VHL-MYC_R1.mLb.clN.CollectMultipleMetrics.base_distribution_by_cycle.pdf [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/picard_metrics/pdf/MV411_MYB_VHL-MYC_R1.mLb.clN.CollectMultipleMetrics.insert_size_histogram.pdf [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/picard_metrics/pdf/MV411_MYB_VHL-MYC_R1.mLb.clN.CollectMultipleMetrics.quality_by_cycle.pdf [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/picard_metrics/pdf/MV411_MYB_VHL-MYC_R1.mLb.clN.CollectMultipleMetrics.quality_distribution.pdf [Content-Type=application/octet-str

Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/samtools_stats/MV411_MYB_DMSO-POLII_total_R1.mLb.mkD.sorted.bam.flagstat [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/samtools_stats/MV411_MYB_DMSO-POLII_total_R1.mLb.mkD.sorted.bam.idxstats [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/samtools_stats/MV411_MYB_DMSO-POLII_total_R1.mLb.mkD.sorted.bam.stats [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/samtools_stats/MV411_MYB_VHL-MED1_R1.mLb.clN.sorted.bam.flagstat [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/samtools_stats/MV411_MYB_VHL-MED1_R1.mLb.clN.sorted.bam.idxstats [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/bwa/mergedLibrary/samtools_stats/MV411_MYB_VHL-MED1_R1.mLb.clN.sorted.bam.stats [Cont

Copying gs://workamlproject/MYB_degraded_v1/fastqc/INPUT_R1_T1_1_fastqc.html [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/fastqc/INPUT_R1_T1_2_fastqc.html [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/fastqc/MV411_MYB_DMSO-MED1_R1_T1_1_fastqc.html [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/fastqc/MV411_MYB_DMSO-MED1_R1_T1_2_fastqc.html [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/fastqc/MV411_MYB_DMSO-MYB_R1_T1_1_fastqc.html [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/fastqc/MV411_MYB_DMSO-MYB_R1_T1_2_fastqc.html [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/fastqc/MV411_MYB_DMSO-MYC_R1_T1_1_fastqc.html [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/fastqc/MV411_MYB_DMSO-MYC_R1_T1_2_fastqc.html [Co

Copying gs://workamlproject/MYB_degraded_v1/trim_galore/fastqc/MV411_MYB_VHL-MYC_R1_T1_1_val_1_fastqc.html [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/trim_galore/fastqc/MV411_MYB_VHL-MYC_R1_T1_2_val_2_fastqc.html [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/trim_galore/fastqc/MV411_MYB_VHL-POLII_total_R1_T1_1_val_1_fastqc.html [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/trim_galore/fastqc/MV411_MYB_VHL-POLII_total_R1_T1_2_val_2_fastqc.html [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/trim_galore/fastqc/zips/INPUT_R1_T1_1_val_1_fastqc.zip [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/trim_galore/fastqc/zips/INPUT_R1_T1_2_val_2_fastqc.zip [Content-Type=application/octet-stream]...
Copying gs://workamlproject/MYB_degraded_v1/trim_galore/fastqc/zips/MV411_MYB_DMSO-MED1_R1_T1_1_val_

### get counts for droso scales

In [68]:
# get unique mapped reads(droso)
! gsutil cat gs://amlproject/Chip/$project/droso_aligned/counts/*
! gsutil ls gs://amlproject/Chip/$project/droso_aligned/counts/

10684804
14376964
17145980
17387728
14001076
17053942
17816174
721324
12537200
gs://amlproject/Chip/MYB_degraded_v1/droso_aligned/counts/INPUT_R1_T1_counts.txt
gs://amlproject/Chip/MYB_degraded_v1/droso_aligned/counts/MV411_MYB_DMSO-MED1_R1_T1_counts.txt
gs://amlproject/Chip/MYB_degraded_v1/droso_aligned/counts/MV411_MYB_DMSO-MYB_R1_T1_counts.txt
gs://amlproject/Chip/MYB_degraded_v1/droso_aligned/counts/MV411_MYB_DMSO-MYC_R1_T1_counts.txt
gs://amlproject/Chip/MYB_degraded_v1/droso_aligned/counts/MV411_MYB_DMSO-POLII_total_R1_T1_counts.txt
gs://amlproject/Chip/MYB_degraded_v1/droso_aligned/counts/MV411_MYB_VHL-MED1_R1_T1_counts.txt
gs://amlproject/Chip/MYB_degraded_v1/droso_aligned/counts/MV411_MYB_VHL-MYB_R1_T1_counts.txt
gs://amlproject/Chip/MYB_degraded_v1/droso_aligned/counts/MV411_MYB_VHL-MYC_R1_T1_counts.txt
gs://amlproject/Chip/MYB_degraded_v1/droso_aligned/counts/MV411_MYB_VHL-POLII_total_R1_T1_counts.txt


In [4]:
counts = ! gsutil cat gs://amlproject/Chip/$project/droso_aligned/counts/*
samples = ! gsutil ls gs://amlproject/Chip/$project/droso_aligned/counts/

# get unique mapped reads(droso) to sample
for c,s in zip(counts, samples):
    print(s.split("/")[-1]+" : "+c)

INPUT_R1_T1_counts.txt : 10684804
MV411_MYB_DMSO-MED1_R1_T1_counts.txt : 14376964
MV411_MYB_DMSO-MYB_R1_T1_counts.txt : 17145980
MV411_MYB_DMSO-MYC_R1_T1_counts.txt : 17387728
MV411_MYB_DMSO-POLII_total_R1_T1_counts.txt : 14001076
MV411_MYB_VHL-MED1_R1_T1_counts.txt : 17053942
MV411_MYB_VHL-MYB_R1_T1_counts.txt : 17816174
MV411_MYB_VHL-MYC_R1_T1_counts.txt : 721324
MV411_MYB_VHL-POLII_total_R1_T1_counts.txt : 12537200


use the total counts in gs://amlproject/Chip/MYB_degraded_v1/multiqc/broadPeak/multiqc_report.html
<br>use MERGED LIB: SAMTools (filtered) > Percent Mapped data [samtools_alignment_plot-2]
https://console.cloud.google.com/storage/browser/amlproject/Chip/MYB_degraded_v1/multiqc/broadPeak/multiqc_report.html
<br>manually enter total counts in "total" column in AML sample tracker > Chip_Tracker_JK

In [78]:
# copy bams, bigwigs, and macs data to local comp
h.createFoldersFor('../../data/chipseq_'+project+'/bwa/mergedLibrary/')
! gsutil -m cp gs://amlproject/Chip/$project/bwa/mergedLibrary/*.ba* ../../data/chipseq_$project/bwa/mergedLibrary/
! gsutil -m cp -r gs://amlproject/Chip/$project/bwa/mergedLibrary/bigwig/ ../../data/chipseq_$project/bwa/mergedLibrary/
! gsutil -m cp -r gs://amlproject/Chip/$project/bwa/mergedLibrary/macs/ ../../data/chipseq_$project/bwa/mergedLibrary/

Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam.bai...
==> NOTE: You are downloading one or more large file(s), which would
run significantly faster if you enabled sliced object downloads. This
feature is enabled by default but requires that compiled crcmod be
installed (see "gsutil help crcmod").

Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MED1_R1.mLb.clN.sorted.bam...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MED1_R1.mLb.clN.sorted.bam.bai...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MYB_R1.mLb.clN.sorted.bam...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MYB_R1.mLb.clN.sorted.bam.bai...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MYC_R1.mLb.clN.sorted.bam...
Copying gs://amlpr

Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/consensus/MED1/MED1.consensus_peaks.saf...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/consensus/MYB/MYB.consensus_peaks.annotatePeaks.txt...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/consensus/MYB/MYB.consensus_peaks.bed...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/consensus/MYB/MYB.consensus_peaks.boolean.annotatePeaks.txt...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/consensus/MYB/MYB.consensus_peaks.boolean.intersect.plot.pdf...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/consensus/MYB/MYB.consensus_peaks.boolean.intersect.txt...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/consensus/MYB/MYB.consensus_peaks.boolean.txt...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/co

Computing scales from AML sample tracker sheet
* using drosophilia ratio (ratio to droso)
* DMSO: scale = treatment / DMSO
* treatment: scale = DMSO / treatment
* anything above 1 is set to 1 so we are not scaling twice

In [5]:
scales = [ #same as file list order
    1.00, #MV411_MYB_DMSO MED1 R1
    1.00, #MV411_MYB_DMSO MYB R1
    0.06, #MV411_MYB_DMSO MYC R1
    0.81, #MV411_MYB_DMSO POLII_total R1
    
    0.82, #MV411_MYB_VHL MED1 R1
    0.89, #MV411_MYB_VHL MYB R1
    1.00, #MV411_MYB_VHL MYC R1
    1.00  #MV411_MYB_VHL POLII_total R1
]

In [6]:
bams = ! ls ../../data/chipseq_$project/bwa/mergedLibrary/*.bam
bams_short = list(i.split("/")[6] for i in bams)
bams_short

['INPUT_R1.mLb.clN.sorted.bam',
 'MV411_MYB_DMSO-MED1_R1.mLb.clN.sorted.bam',
 'MV411_MYB_DMSO-MYB_R1.mLb.clN.sorted.bam',
 'MV411_MYB_DMSO-MYC_R1.mLb.clN.sorted.bam',
 'MV411_MYB_DMSO-POLII_total_R1.mLb.clN.sorted.bam',
 'MV411_MYB_VHL-MED1_R1.mLb.clN.sorted.bam',
 'MV411_MYB_VHL-MYB_R1.mLb.clN.sorted.bam',
 'MV411_MYB_VHL-MYC_R1.mLb.clN.sorted.bam',
 'MV411_MYB_VHL-POLII_total_R1.mLb.clN.sorted.bam']

In [4]:
names = ["MED1_R1", "MYB_R1", "MYC_R1", "POLII_total_R1"]

## on scaled data

In [82]:
! mkdir ../../data/chipseq_$project/diffPeaks/ && ! mkdir ../../data/chipseq_$project/diffData/

In [8]:
wigs = ! ls ../../data/chipseq_$project/bwa/mergedLibrary/bigwig/*.bigWig
wigs

['../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/INPUT_R1.bigWig',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/MV411_MYB_DMSO-MED1_R1.bigWig',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/MV411_MYB_DMSO-MYB_R1.bigWig',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/MV411_MYB_DMSO-MYC_R1.bigWig',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/MV411_MYB_DMSO-POLII_total_R1.bigWig',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/MV411_MYB_VHL-MED1_R1.bigWig',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/MV411_MYB_VHL-MYB_R1.bigWig',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/MV411_MYB_VHL-MYC_R1.bigWig',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/MV411_MYB_VHL-POLII_total_R1.bigWig']

### diffPeak on scaled data (full reprocessing)

__fullDiffPeak__
<br>uses macs3 to call differential peak binding from 2 bam files and their control
<br>(can also provide some spike-in scaling info)
* compute avg fragment size (macs3 predictd -i)
* macs3 callpeak -B -t bam1 -c control1 --nomodel --extsize predicted_fragment_length -n name1 --outdir directory -f endedness

__diffPeak__
<br>calls MACS2 bdgdiff

In [89]:
for i in range(int(len(bams)/2)):
    if i < 0:
        continue
    print(i)
    bam1 = bams[1+i]
    bam2 = bams[1+len(names)+i]
    print(bams_short[1+i])
    print(bams_short[1+len(names)+i])
    print([scales[i], scales[len(names)+i]])
    print("\n")

0
MV411_MYB_DMSO-MED1_R1.mLb.clN.sorted.bam
MV411_MYB_VHL-MED1_R1.mLb.clN.sorted.bam
[1.0, 0.82]


1
MV411_MYB_DMSO-MYB_R1.mLb.clN.sorted.bam
MV411_MYB_VHL-MYB_R1.mLb.clN.sorted.bam
[1.0, 0.89]


2
MV411_MYB_DMSO-MYC_R1.mLb.clN.sorted.bam
MV411_MYB_VHL-MYC_R1.mLb.clN.sorted.bam
[0.06, 1.0]


3
MV411_MYB_DMSO-POLII_total_R1.mLb.clN.sorted.bam
MV411_MYB_VHL-POLII_total_R1.mLb.clN.sorted.bam
[0.81, 1.0]




In [90]:
for i in range(int(len(bams)/2)):
    if i < 0:
        continue
    bam1 = bams[1+i]
    bam2 = bams[1+len(names)+i]
    print(bams_short[1+i])
    print(bams_short[1+len(names)+i])
    print(await chip.fullDiffPeak(bam1, bam2, control1 = bams[0], 
                            scaling = [scales[i], scales[len(names)+i]], 
                            directory = "../../data/chipseq_"+project+"/diffData/", 
                            res_directory = "../../data/chipseq_"+project+"/diffPeaks/", 
                            pairedend=True))
    print("\n")

MV411_MYB_DMSO-MED1_R1.mLb.clN.sorted.bam
MV411_MYB_VHL-MED1_R1.mLb.clN.sorted.bam
doing diff from ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MED1_R1.mLb.clN.sorted.bam and ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MED1_R1.mLb.clN.sorted.bam
computing the fragment avg size
218
computing the scaling values
b'INFO  @ Mon, 16 May 2022 15:10:07: \n# Command line: callpeak -B -t ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MED1_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 218 -n MV411_MYB_DMSO-MED1_R1 --outdir ../../data/chipseq_MYB_degraded_v1/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_MYB_DMSO-MED1_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MED1_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.

b'INFO  @ Mon, 16 May 2022 15:32:15: \n# Command line: callpeak -B -t ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MED1_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 218 -n MV411_MYB_VHL-MED1_R1 --outdir ../../data/chipseq_MYB_degraded_v1/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_MYB_VHL-MED1_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MED1_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards smaller dataset.\n# Range for c

CompletedProcess(args='macs3 bdgdiff --t1 ../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_DMSO-MED1_R1_treat_pileup.bdg --c1 ../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_DMSO-MED1_R1_control_lambda.bdg --t2 ../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_VHL-MED1_R1_treat_pileup.bdg --c2 ../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_VHL-MED1_R1_control_lambda.bdg --d1 37355451 --d2 42521653 -g 60 -l 218 --o-prefix MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_MYB_VHL-MED1_R1_treat_pileup --outdir ../../data/chipseq_MYB_degraded_v1/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Mon, 16 May 2022 15:55:52: Read and build treatment 1 bedGraph... \nINFO  @ Mon, 16 May 2022 15:56:43: Read and build control 1 bedGraph... \nINFO  @ Mon, 16 May 2022 15:59:16: Read and build treatment 2 bedGraph... \nINFO  @ Mon, 16 May 2022 16:00:02: Read and build control 2 bedGraph... \nINFO  @ Mon, 16 May 2022 16:09:00: Write peaks... \nINFO  @ Mon, 16 May 2022 16:09:00: 

b'INFO  @ Mon, 16 May 2022 16:34:49: \n# Command line: callpeak -B -t ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MYB_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 221 -n MV411_MYB_VHL-MYB_R1 --outdir ../../data/chipseq_MYB_degraded_v1/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_MYB_VHL-MYB_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MYB_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards smaller dataset.\n# Range for calcu

CompletedProcess(args='macs3 bdgdiff --t1 ../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_DMSO-MYB_R1_treat_pileup.bdg --c1 ../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_DMSO-MYB_R1_control_lambda.bdg --t2 ../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_VHL-MYB_R1_treat_pileup.bdg --c2 ../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_VHL-MYB_R1_control_lambda.bdg --d1 41185974 --d2 41360951 -g 60 -l 221 --o-prefix MV411_MYB_DMSO-MYB_R1_treat_pileup_vs_MV411_MYB_VHL-MYB_R1_treat_pileup --outdir ../../data/chipseq_MYB_degraded_v1/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Mon, 16 May 2022 16:56:37: Read and build treatment 1 bedGraph... \nINFO  @ Mon, 16 May 2022 16:57:33: Read and build control 1 bedGraph... \nINFO  @ Mon, 16 May 2022 17:00:05: Read and build treatment 2 bedGraph... \nINFO  @ Mon, 16 May 2022 17:00:53: Read and build control 2 bedGraph... \nINFO  @ Mon, 16 May 2022 17:10:16: Write peaks... \nINFO  @ Mon, 16 May 2022 17:10:16: Done \

b'INFO  @ Mon, 16 May 2022 17:34:27: \n# Command line: callpeak -B -t ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MYC_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 225 -n MV411_MYB_VHL-MYC_R1 --outdir ../../data/chipseq_MYB_degraded_v1/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_MYB_VHL-MYC_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MYC_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards smaller dataset.\n# Range for calcu

CompletedProcess(args='macs3 bdgdiff --t1 ../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_DMSO-MYC_R1_treat_pileup.bdg --c1 ../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_DMSO-MYC_R1_control_lambda.bdg --t2 ../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_VHL-MYC_R1_treat_pileup.bdg --c2 ../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_VHL-MYC_R1_control_lambda.bdg --d1 514601933 --d2 26069961 -g 60 -l 225 --o-prefix MV411_MYB_DMSO-MYC_R1_treat_pileup_vs_MV411_MYB_VHL-MYC_R1_treat_pileup --outdir ../../data/chipseq_MYB_degraded_v1/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Mon, 16 May 2022 17:55:07: Read and build treatment 1 bedGraph... \nINFO  @ Mon, 16 May 2022 17:55:50: Read and build control 1 bedGraph... \nINFO  @ Mon, 16 May 2022 17:58:23: Read and build treatment 2 bedGraph... \nINFO  @ Mon, 16 May 2022 17:58:59: Read and build control 2 bedGraph... \nINFO  @ Mon, 16 May 2022 18:07:47: Write peaks... \nINFO  @ Mon, 16 May 2022 18:07:47: Done 

b'INFO  @ Mon, 16 May 2022 18:33:56: \n# Command line: callpeak -B -t ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-POLII_total_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 220 -n MV411_MYB_VHL-POLII_total_R1 --outdir ../../data/chipseq_MYB_degraded_v1/diffData/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_MYB_VHL-POLII_total_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-POLII_total_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards sma

CompletedProcess(args='macs3 bdgdiff --t1 ../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_DMSO-POLII_total_R1_treat_pileup.bdg --c1 ../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_DMSO-POLII_total_R1_control_lambda.bdg --t2 ../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_VHL-POLII_total_R1_treat_pileup.bdg --c2 ../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_VHL-POLII_total_R1_control_lambda.bdg --d1 46594586 --d2 42873240 -g 60 -l 220 --o-prefix MV411_MYB_DMSO-POLII_total_R1_treat_pileup_vs_MV411_MYB_VHL-POLII_total_R1_treat_pileup --outdir ../../data/chipseq_MYB_degraded_v1/diffPeaks/', returncode=0, stdout=b'', stderr=b'INFO  @ Mon, 16 May 2022 18:57:37: Read and build treatment 1 bedGraph... \nINFO  @ Mon, 16 May 2022 18:58:28: Read and build control 1 bedGraph... \nINFO  @ Mon, 16 May 2022 19:01:06: Read and build treatment 2 bedGraph... \nINFO  @ Mon, 16 May 2022 19:02:00: Read and build control 2 bedGraph... \nINFO  @ Mon, 16 May 2022 19:11:51: Write peak

In [9]:
initscales = ! cat ../../data/chipseq_$project/bwa/mergedLibrary/bigwig/scale/*.txt
initscales

['0.00744797',
 '0.0132932',
 '0.0120614',
 '0.0160682',
 '0.0131957',
 '0.0142413',
 '0.0134773',
 '0.0190134',
 '0.011588']

In [10]:
rescales = [val*float(initscales[1+i]) for i, val in enumerate(scales)]
rescales

[0.0132932,
 0.0120614,
 0.000964092,
 0.010688517,
 0.011677865999999999,
 0.011994797,
 0.0190134,
 0.011588]

In [11]:
bams

['../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MED1_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MYB_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MYC_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-POLII_total_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MED1_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MYB_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MYC_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-POLII_total_R1.mLb.clN.sorted.bam']

In [13]:
chip.bigWigFrom(bams[1:], genome="GRCh38", scaling=rescales, numthreads=8)

In [14]:
! mkdir ../../data/chipseq_$project/recalib_bigwig/ && mv bigwig/* ../../data/chipseq_$project/recalib_bigwig/

In [16]:
! gsutil -m cp -r ../../data/chipseq_$project/recalib_bigwig gs://amlproject/Chip/$project/

Copying file://../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_VHL-MYB_R1.bw [Content-Type=application/octet-stream]...
==> NOTE: You are uploading one or more large file(s), which would run          
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums on composite objects is
so slow that gsutil disables downloads of composite objects.

Copying file://../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_VHL-POLII_total_R1.bw [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MYB_degr

In [17]:
os.popen('for i in $(ls ../../data/chipseq_'+project+'/diffPeaks/*.bed); \
            do echo $(wc -l $i); \
            done').read().split('\n')

['5951 ../../data/chipseq_MYB_degraded_v1/diffPeaks/MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_MYB_VHL-MED1_R1_treat_pileup_c3.0_common.bed',
 '490 ../../data/chipseq_MYB_degraded_v1/diffPeaks/MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_MYB_VHL-MED1_R1_treat_pileup_c3.0_cond1.bed',
 '62 ../../data/chipseq_MYB_degraded_v1/diffPeaks/MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_MYB_VHL-MED1_R1_treat_pileup_c3.0_cond2.bed',
 '25366 ../../data/chipseq_MYB_degraded_v1/diffPeaks/MV411_MYB_DMSO-MYB_R1_treat_pileup_vs_MV411_MYB_VHL-MYB_R1_treat_pileup_c3.0_common.bed',
 '18826 ../../data/chipseq_MYB_degraded_v1/diffPeaks/MV411_MYB_DMSO-MYB_R1_treat_pileup_vs_MV411_MYB_VHL-MYB_R1_treat_pileup_c3.0_cond1.bed',
 '893 ../../data/chipseq_MYB_degraded_v1/diffPeaks/MV411_MYB_DMSO-MYB_R1_treat_pileup_vs_MV411_MYB_VHL-MYB_R1_treat_pileup_c3.0_cond2.bed',
 '1 ../../data/chipseq_MYB_degraded_v1/diffPeaks/MV411_MYB_DMSO-MYC_R1_treat_pileup_vs_MV411_MYB_VHL-MYC_R1_treat_pileup_c3.0_common.bed',
 '1 ../..

In [47]:
bed_counts = ! wc -l ../../data/chipseq_$project/diffPeaks/*.bed

In [48]:
def getDiffPeakCounts(bed_counts, scaled, names):
    peak_types = ["common", "cond1", "cond2"]
    rows = []
    
    for i,s in enumerate(bed_counts[:-1]):
        counts = re.search(r'\d+', s).group()
        file = s.split("    ")[-1].split("/")[-1]
        sample_group = next(name for name in names if name in file)
        cond_dict = {"common": "COMMON", "cond1": "DMSO", "cond2": "VHL"}
        peak_cond = cond_dict.get(next(peak_type for peak_type in peak_types if peak_type in file))
        rows.append([scaled, sample_group, peak_cond, file, counts])
    
    return(pd.DataFrame(rows, columns=["SCALE", "SAMPLE", "PEAK_TYPE", "FILE", "N_PEAK"]))

In [50]:
df_diff_peak = getDiffPeakCounts(bed_counts, scaled="SCALED", names=names)

In [52]:
df_diff_peak.head()

Unnamed: 0,SCALE,SAMPLE,PEAK_TYPE,FILE,N_PEAK
0,SCALED,MED1_R1,COMMON,MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_M...,5951
1,SCALED,MED1_R1,DMSO,MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_M...,490
2,SCALED,MED1_R1,VHL,MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_M...,62
3,SCALED,MYB_R1,COMMON,MV411_MYB_DMSO-MYB_R1_treat_pileup_vs_MV411_MY...,25366
4,SCALED,MYB_R1,DMSO,MV411_MYB_DMSO-MYB_R1_treat_pileup_vs_MV411_MY...,18826


In [20]:
bw = ! ls ../../data/chipseq_$project/recalib_bigwig/*
bw

['../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_DMSO-MED1_R1.bw',
 '../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_DMSO-MYB_R1.bw',
 '../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_DMSO-MYC_R1.bw',
 '../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_DMSO-POLII_total_R1.bw',
 '../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_VHL-MED1_R1.bw',
 '../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_VHL-MYB_R1.bw',
 '../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_VHL-MYC_R1.bw',
 '../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_VHL-POLII_total_R1.bw']

In [21]:
! mkdir ../results/chipseq_$project/
! mkdir ../results/chipseq_$project/plots
! mkdir ../results/chipseq_$project/plots/heatmaps/

In [22]:
peaks = ! ls ../../data/chipseq_$project/bwa/mergedLibrary/macs/broadPeak/*.broadPeak
peaks

['../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_MYB_DMSO-MED1_R1_peaks.broadPeak',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_MYB_DMSO-MYB_R1_peaks.broadPeak',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_MYB_DMSO-MYC_R1_peaks.broadPeak',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_MYB_DMSO-POLII_total_R1_peaks.broadPeak',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_MYB_VHL-MED1_R1_peaks.broadPeak',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_MYB_VHL-MYB_R1_peaks.broadPeak',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_MYB_VHL-MYC_R1_peaks.broadPeak',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/macs/broadPeak/MV411_MYB_VHL-POLII_total_R1_peaks.broadPeak']

### merging peaks VHL/DMSO

In [25]:
mpeaks = []
for i,val in enumerate(names):
    if i<0:
        continue
    print(val)
    dmso = peaks[i]
    vhl = peaks[i+len(names)]
    chip.simpleMergePeaks(pd.concat([chip.loadPeaks(dmso), chip.loadPeaks(vhl)])).to_csv(
        '../../data/chipseq_' + project + '/' + val + '_genomewide_merged.bed', sep='\t', header=False, index=False)
    mpeaks.append('../../data/chipseq_'+project+'/'+val+'_genomewide_merged.bed')

MED1_R1


  bindings = bindings.drop(5, 1).drop(4, 1).rename(columns={6:4, 7:5, 8:6, 9:7,})


0.03.2354083085285365e-056.470816617057073e-059.70622492558561e-050.000129416332341141460.00016177041542642680.00019412449851171220.000226478581596997540.00025883266468228290.000291186747767568270.00032354083085285360.0003558949139381390.00038824899702342440.000420603080108709730.00045295716319399510.000485311246279280430.00051766532936456580.00055001941244985120.00058237349553513650.00061472757862042190.00064708166170570720.00067943574479099260.0007117898278762780.00074414391096156330.00077649799404684880.00080885207713213410.00084120616021741950.00087356024330270480.00090591432638799020.00093826840947327550.00097062249255856090.00100297657564384620.00103533065872913170.0010676847418144170.00110003882489970240.00113239290798498760.0011647469910702730.00119710107415555830.00122945515724084380.00126180924032612920.00129416332341141450.00132651740649670.00135887148958198520.00139122557266727060.0014235796557525560.00145593373883784140.0014882

0.0460075061472757850.046039860230361070.046072214313446360.046104568396531640.0461369224796169260.046169276562702210.04620163064578750.046233984728872780.046266338811958070.0462986928950433540.046331046978128640.046363401061213930.046395755144299210.0464281092273844950.046460463310469780.046492817393555070.046525171476640350.0465575255597256360.046589879642810920.046622233725896210.046654587808981490.046686941892066780.0467192959751520640.046751650058237350.046784004141322640.046816358224407920.0468487123074932050.046881066390578490.046913420473663780.046945774556749060.0469781286398343460.047010482722919630.047042836806004920.04707519088909020.047107544972175490.0471398990552607740.047172253138346060.047204607221431350.047236961304516630.0472693153876019150.04730166947068720.047334023553772490.047366377636857770.0473987317199430560.047431085803028340.047463439886113630.047495793969198910.04752814805228420.0475605021353694840.0475928562

0.096641646175747370.096674000258832660.096706354341917950.096738708425003230.096771062508088520.096803416591173810.09683577067425910.096868124757344380.096900478840429670.096932832923514940.096965187006600230.096997541089685520.09702989517277080.097062249255856090.097094603338941380.097126957422026660.097159311505111950.097191665588197220.097224019671282510.09725637375436780.097288727837453080.097321081920538370.097353436003623660.097385790086708950.097418144169794230.097450498252879520.097482852335964790.097515206419050080.097547560502135370.097579914585220650.097612268668305940.097644622751391230.097676976834476510.09770933091756180.097741685000647090.097774039083732360.097806393166817650.097838747249902940.097871101332988220.097903455416073510.09793580949915880.097968163582244080.098000517665329370.098032871748414640.098065225831499930.098097579914585220.09812993399767050.098162288080755790.098194642163841080.098226996246926370.0982

0.141969716578232180.142002070661317450.142034424744402750.142066778827488030.142099132910573330.14213148699365860.142163841076743870.142196195159829180.142228549242914450.142260903325999750.142293257409085020.142325611492170320.14235796557525560.142390319658340870.142422673741426170.142455027824511440.142487381907596740.142519735990682020.142552090073767320.14258444415685260.14261679823993790.142649152323023170.142681506406108440.142713860489193740.1427462145722790.14277856865536430.142810922738449590.14284327682153490.142875630904620160.142907984987705460.142940339070790730.1429726931538760.14300504723696130.143037401320046580.143069755403131880.143102109486217150.143134463569302460.143166817652387730.143199171735473030.14323152581855830.143263879901643580.143296233984728880.143328588067814150.143360942150899450.143393296233984720.143425650317070020.14345800440015530.14349035848324060.143522712566325870.143555066649411140.14358742073

0.189756697295198650.189789051378283950.189821405461369220.189853759544454520.18988611362753980.18991846771062510.189950821793710370.189983175876795640.190015529959880940.190047884042966210.190080238126051520.19011259220913680.19014494629222210.190177300375307360.190209654458392660.190242008541477940.19027436262456320.19030671670764850.190339070790733780.190371424873819080.190403778956904360.190436133039989660.190468487123074930.19050084120616020.19053319528924550.190565549372330780.190597903455416080.190630257538501350.190662611621586650.190694965704671930.190727319787757230.19075967387084250.190792027953927770.190824382037013070.190856736120098350.190889090203183650.190921444286268920.190953798369354220.19098615245243950.19101850653552480.191050860618610070.191083214701695340.191115568784780640.191147922867865920.191180276950951220.19121263103403650.19124498511712180.191277339200207060.191309693283292360.191342047366377640.19137440144

0.2371877830982270.237220137181312270.237252491264397570.237284845347482840.237317199430568140.237349553513653420.237381907596738720.2374142616798240.23744661576290930.237478969845994560.237511323929079840.237543678012165140.23757603209525040.23760838617833570.237640740261420990.237673094344506290.237705448427591560.237737802510676860.237770156593762130.23780251067684740.23783486475993270.237867218843017980.237899572926103280.237931927009188550.237964281092273850.237996635175359130.238028989258444430.23806134334152970.238093697424614980.238126051507700280.238158405590785550.238190759673870850.238223113756956120.238255467840041420.23828782192312670.2383201760062120.238352530089297270.238384884172382540.238417238255467840.238449592338553120.238481946421638420.23851430050472370.2385466545878090.238579008670894270.238611362753979540.238643716837064840.23867607092015010.23870842500323540.23874077908632070.2387731331694060.238805487252491260

0.28513653423061990.28516888831370520.285201242396790460.28523359647987580.285265950562961060.285298304646046330.28533065872913160.28536301281221690.28539536689530220.28542772097838750.285460075061472750.2854924291445580.285524783227643350.28555713731072860.28558949139381390.285621845476899170.285654199559984450.28568655364306980.285718907726155050.28575126180924030.28578361589232560.28581596997541090.28584832405849620.285880678141581470.285913032224666740.2859453863077520.285977740390837340.28601009447392260.28604244855700790.286074802640093160.28610715672317850.286139510806263760.286171864889349040.28620421897243430.28623657305551960.28626892713860490.28630128122169020.286333635304775460.286365989387860730.286398343470946060.286430697554031330.28646305163711660.28649540572020190.286527759803287150.28656011388637250.286592467969457750.2866248220525430.28665717613562830.286689530218713630.28672188430179890.28675423838488420.28678659246

0.331273456710236840.33130581079332210.33133816487640740.33137051895949270.3314028730425780.331435227125663260.331467581208748530.33149993529183380.331532289374919130.33156464345800440.33159699754108970.331629351624174950.33166170570726030.331694059790345550.33172641387343080.33175876795651610.331791122039601370.33182347612268670.3318558302057720.331888184288857250.33192053837194250.331952892455027850.33198524653811310.33201760062119840.332049954704283670.332082308787368940.332114662870454270.332147016953539540.33217937103662480.33221172511971010.33224407920279540.33227643328588070.332308787368965960.332341141452051240.33237349553513650.332405849618221840.33243820370130710.33247055778439240.332502911867477660.3325352659505630.332567620033648260.332599974116733530.33263232819981880.33266468228290410.33269703636598940.33272939044907470.332761744532159950.33279409861524520.332826452698330550.332858806781415830.33289116086450110.3329235149

0.38203701307104960.382069367154134860.382101721237220140.38213407532030540.38216642940339070.3821987834864760.38223113756956130.382263491652646560.382295845735731830.382328199818817160.382360553901902430.38239290798498770.3824252620680730.382457616151158250.38248997023424360.382522324317328850.38255467840041410.38258703248349940.382619386566584730.382651740649670.38268409473275530.382716448815840550.38274880289892580.382781156982011150.38281351106509640.38284586514818170.382878219231266970.38291057331435230.382942927397437570.382975281480522840.38300763556360810.38303998964669340.38307234372977870.3831046978128640.383137051895949270.383169405979034540.383201760062119870.383234114145205140.38326646822829040.38329882231137570.383331176394460960.38336353047754630.383395884560631560.383428238643716830.38346059272680210.38349294680988740.38352530089297270.3835576549760580.383590009059143260.383622363142228530.383654717225313860.38368707130

0.42882101721237220.42885337129545750.428885725378542750.42891807946162810.428950433544713350.42898278762779860.42901514171088390.42904749579396920.42907984987705450.429112203960139770.429144558043225040.42917691212631030.429209266209395650.42924162029248090.42927397437556620.429306328458651460.42933868254173680.429371036624822070.429403390707907340.42943574479099260.42946809887407790.42950045295716320.42953280704024850.429565161123333760.429597515206419030.429629869289504360.429662223372589640.42969457745567490.42972693153876020.429759285621845450.42979163970493080.429823993788016060.429856347871101330.42988870195418660.429921056037271930.42995341012035720.42998576420344250.430018118286527750.4300504723696130.430082826452698350.43011518053578360.43014753461886890.430179888701954170.430212242785039440.43024459686812480.430276950951210050.43030930503429530.43034165911738060.43037401320046590.43040636728355120.430438721366636470.43047107

0.477934515335835360.47796686941892070.477999223502005970.478031577585091240.47806393166817650.47809628575126180.47812863983434710.47816099391743240.478193348000517660.478225702083602930.478258056166688260.478290410249773530.47832276433285880.47835511841594410.478387472499029350.47841982658211470.478452180665199960.478484534748285230.47851688883137050.478549242914455830.47858159699754110.47861395108062640.478646305163711650.47867865924679690.478711013329882250.47874336741296750.47877572149605280.478808075579138070.47884042966222340.478872783745308670.478905137828393950.47893749191147920.47896984599456450.47900220007764980.47903455416073510.479066908243820370.479099262326905640.479131616409990970.479163970493076240.47919632457616150.47922867865924680.479261032742332060.47929338682541740.479325740908502660.479358094991587940.47939044907467320.47942280315775850.47945515724084380.47948751132392910.479519865407014360.479552219490099630.4795

0.52536560113886380.5253979552219490.52543030930503430.52546266338811950.52549501747120490.52552737155429020.52555972563737540.52559207972046070.5256244338035460.52565678788663130.52568914196971660.52572149605280180.52575385013588720.52578620421897250.52581855830205770.5258509123851430.52588326646822820.52591562055131360.52594797463439890.52598032871748410.52601268280056950.52604503688365470.526077390966740.52610974504982530.52614209913291050.52617445321599590.52620680729908110.52623916138216640.52627151546525170.5263038695483370.52633622363142230.52636857771450760.52640093179759280.52643328588067820.52646563996376340.52649799404684870.5265303481299340.52656270221301930.52659505629610460.52662741037918980.52665976446227510.52669211854536050.52672447262844570.5267568267115310.52678918079461620.52682153487770160.52685388896078690.52688624304387210.52691859712695740.52695095121004280.5269833052931280.52701565937621330.52704801345929850

0.57250550019412450.57253785427720980.5725702083602950.57260256244338040.57263491652646560.57266727060955090.57269962469263620.57273197877572150.57276433285880680.57279668694189210.57282904102497730.57286139510806270.57289374919114790.57292610327423320.57295845735731850.57299081144040380.57302316552348910.57305551960657430.57308787368965960.5731202277727450.57315258185583020.57318493593891550.57321729002200070.5732496441050860.57328199818817140.57331435227125660.57334670635434190.57337906043742730.57341141452051250.57344376860359780.5734761226866830.57350847676976830.57354083085285370.57357318493593890.57360553901902420.57363789310210940.57367024718519480.57370260126828010.57373495535136530.57376730943445060.57379966351753590.57383201760062120.57386437168370650.57389672576679170.57392907984987710.57396143393296240.57399378801604760.57402614209913290.57405849618221820.57409085026530350.57412320434838880.5741555584314740.574187912514559

0.62035718907726160.62038954316034690.62042189724343210.62045425132651740.62048660540960270.6205189594926880.62055131357577330.62058366765885850.62061602174194390.62064837582502910.62068072990811440.62071308399119970.6207454380742850.62077779215737030.62081014624045550.62084250032354080.62087485440662620.62090720848971140.62093956257279670.62097191665588190.62100427073896720.62103662482205260.62106897890513780.62110133298822310.62113368707130840.62116604115439370.6211983952374790.62123074932056420.62126310340364950.62129545748673490.62132781156982010.62136016565290540.62139251973599060.6214248738190760.62145722790216130.62148958198524650.62152193606833180.6215542901514170.62158664423450240.62161899831758770.62165135240067290.62168370648375830.62171606056684360.62174841464992880.62178076873301410.62181312281609940.62184547689918470.621877830982270.62191018506535520.62194253914844060.62197489323152580.62200724731461110.6220396013976964

0.66788533712954580.6679176912126310.66795004529571630.66798239937880160.66801475346188690.66804710754497220.66807946162805740.66811181571114280.66814416979422810.66817652387731330.66820887796039860.66824123204348380.66827358612656920.66830594020965450.66833829429273970.6683706483758250.66840300245891030.66843535654199560.66846771062508090.66850006470816610.66853241879125150.66856477287433670.6685971269574220.66862948104050730.66866183512359260.66869418920667790.66872654328976320.66875889737284840.66879125145593380.6688236055390190.66885595962210430.66888831370518960.66892066778827490.66895302187136020.66898537595444540.66901773003753070.66905008412061610.66908243820370130.66911479228678660.66914714636987180.66917950045295720.66921185453604250.66924420861912770.6692765627022130.66930891678529840.66934127086838360.66937362495146890.66940597903455410.66943833311763950.66947068720072480.669503041283810.66953539536689530.6695677494499805

0.7147340494370390.71476640352012420.71479875760320950.71483111168629480.71486346576938010.71489581985246540.71492817393555060.7149605280186360.71499288210172120.71502523618480650.71505759026789180.71508994435097710.71512229843406240.71515465251714770.71518700660023290.71521936068331830.71525171476640350.71528406884948880.71531642293257410.71534877701565940.71538113109874470.71541348518182990.71544583926491520.71547819334800060.71551054743108580.71554290151417110.71557525559725630.71560760968034160.7156399637634270.71567231784651220.71570467192959750.71573702601268290.71576938009576810.71580173417885340.71583408826193860.7158664423450240.71589879642810930.71593115051119450.71596350459427980.7159958586773650.71602821276045040.71606056684353570.71609292092662090.71612527500970620.71615762909279150.71618998317587680.71622233725896210.71625469134204730.71628704542513270.7163193995082180.71635175359130320.71638410767438850.7164164617574738

0.7621327811569820.76216513524006730.76219748932315260.76222984340623790.76226219748932310.76229455157240840.76232690565549380.7623592597385790.76239161382166430.76242396790474950.76245632198783490.76248867607092020.76252103015400540.76255338423709070.7625857383201760.76261809240326130.76265044648634660.76268280056943180.76271515465251720.76274750873560250.76277986281868770.7628122169017730.76284457098485830.76287692506794360.76290927915102890.76294163323411410.76297398731719950.76300634140028470.763038695483370.76307104956645530.76310340364954050.76313575773262590.76316811181571110.76320046589879640.76323281998188170.7632651740649670.76329752814805230.76332988223113760.76336223631422280.76339459039730820.76342694448039340.76345929856347870.7634916526465640.76352400672964930.76355636081273460.76358871489581980.76362106897890510.76365342306199050.76368577714507570.7637181312281610.76375048531124620.76378283939433160.76381519347741690.

0.80891678529830470.80894913938138990.80898149346447520.80901384754756040.80904620163064580.80907855571373110.80911090979681630.80914326387990170.8091756179629870.80920797204607220.80924032612915750.80927268021224270.80930503429532810.80933738837841340.80936974246149860.8094020965445840.80943445062766920.80946680471075450.80949915879383980.8095315128769250.80956386696001040.80959622104309560.80962857512618090.80966092920926620.80969328329235150.80972563737543680.80975799145852210.80979034554160730.80982269962469270.80985505370777790.80988740779086320.80991976187394850.80995211595703380.80998447004011910.81001682412320430.81004917820628960.8100815322893750.81011388637246020.81014624045554550.81017859453863070.8102109486217160.81024330270480140.81027565678788660.81030801087097190.81034036495405730.81037271903714250.81040507312022780.8104374272033130.81046978128639830.81050213536948370.81053448945256890.81056684353565420.8105991976187394

0.85553901902420080.85557137310728610.85560372719037150.85563608127345670.8556684353565420.85570078943962720.85573314352271260.85576549760579790.85579785168888310.85583020577196840.85586255985505370.8558949139381390.85592726802122430.85595962210430950.85599197618739490.85602433027048010.85605668435356540.85608903843665070.8561213925197360.85615374660282130.85618610068590660.85621845476899180.85625080885207720.85628316293516240.85631551701824770.8563478711013330.85638022518441830.85641257926750360.85644493335058880.85647728743367410.85650964151675950.85654199559984470.856574349682930.85660670376601520.85663905784910050.85667141193218590.85670376601527110.85673612009835640.85676847418144180.8568008282645270.85683318234761230.85686553643069750.85689789051378280.85693024459686820.85696259867995340.85699495276303870.85702730684612390.85705966092920930.85709201501229460.85712436909537980.85715672317846510.85718907726155040.8572214313446357

0.90332599974116730.90335835382425270.90339070790733790.90342306199042320.90345541607350840.90348777015659380.90352012423967910.90355247832276430.90358483240584960.90361718648893490.90364954057202020.90368189465510550.90371424873819070.90374660282127610.90377895690436140.90381131098744660.90384366507053190.90387601915361710.90390837323670250.90394072731978780.9039730814028730.90400543548595840.90403778956904360.90407014365212890.90410249773521420.90413485181829940.90416720590138480.904199559984470.90423191406755530.90426426815064060.90429662223372590.90432897631681120.90436133039989640.90439368448298170.90442603856606710.90445839264915230.90449074673223760.9045231008153230.90455545489840820.90458780898149350.90462016306457870.9046525171476640.90468487123074940.90471722531383460.90474957939691990.90478193348000510.90481428756309050.90484664164617580.9048789957292610.90491134981234630.90494370389543160.90497605797851690.9050084120616022

0.95069237737802510.95072473146111040.95075708554419570.95078943962728090.95082179371036620.95085414779345160.95088650187653680.95091885595962210.95095121004270740.95098356412579270.9510159182088780.95104827229196320.95108062637504850.95111298045813390.95114533454121910.95117768862430440.95121004270738960.9512423967904750.95127475087356030.95130710495664550.95133945903973080.9513718131228160.95140416720590140.95143652128898670.95146887537207190.95150122945515720.95153358353824260.95156593762132780.95159829170441310.95163064578749830.95166299987058370.9516953539536690.95172770803675420.95176006211983950.95179241620292480.95182477028601010.95185712436909540.95188947845218060.9519218325352660.95195418661835120.95198654070143650.95201889478452180.95205124886760710.95208360295069240.95211595703377770.95214831111686290.95218066519994830.95221301928303350.95224537336611880.95227772744920410.95231008153228940.95234243561537470.952374789698459

MYB_R1352918338295
MYC_R18170609004273564
POLII_total_R155373657
0.99998345849736996457

### GENOME WIDE comparison

In [35]:
! ls ../../data/chipseq_$project/recalib_bigwig/*

../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_DMSO-MED1_R1.bw
../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_DMSO-MYB_R1.bw
../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_DMSO-MYC_R1.bw
../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_DMSO-POLII_total_R1.bw
../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_VHL-MED1_R1.bw
../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_VHL-MYB_R1.bw
../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_VHL-MYC_R1.bw
../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_VHL-POLII_total_R1.bw


In [5]:
condname="_MYB_ko"
dmsoname="_DMSO"

In [54]:
for i, val in enumerate(bw):
    if i <0:
        continue
    print(os.path.basename(val))
    name = names[i-len(names)]+condname if i//len(names) else names[i]+dmsoname
    print(name)
    print(mpeaks[i%len(names)])
    genepyPlot.getPeaksAt(mpeaks[i%len(names)], bigwigs=val, bigwignames= name, 
                          peaknames=['Macs2_Peaks'], window=3000, folder="", title=name, numthreads=8, 
                          refpoint="center", name='../../data/chipseq_'+project+'/'+name+'_mat.pdf', withDeeptools=True, 
                          torecompute=True, legendLoc="lower-left")

MV411_MYB_DMSO-MED1_R1.bw
MED1_R1_DMSO
../../data/chipseq_MYB_degraded_v1/MED1_R1_genomewide_merged.bed
CompletedProcess(args="computeMatrix reference-point -S ../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_DMSO-MED1_R1.bw  --referencePoint center --regionsFileName ../../data/chipseq_MYB_degraded_v1/MED1_R1_genomewide_merged.bed  --missingDataAsZero --outFileName ../../data/chipseq_MYB_degraded_v1/MED1_R1_DMSO_mat.gz --upstream 3000 --downstream 3000 --numberOfProcessors 8 && plotHeatmap --matrixFile ../../data/chipseq_MYB_degraded_v1/MED1_R1_DMSO_mat.gz --outFileName ../../data/chipseq_MYB_degraded_v1/MED1_R1_DMSO_mat.pdf --refPointLabel center --legendLocation lower-left --regionsLabel Macs2_Peaks --plotTitle 'MED1_R1_DMSO'", returncode=0, stdout=b'', stderr=b'')
MV411_MYB_DMSO-MYB_R1.bw
MYB_R1_DMSO
../../data/chipseq_MYB_degraded_v1/MYB_R1_genomewide_merged.bed
CompletedProcess(args="computeMatrix reference-point -S ../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV41

### making overlapping profiles

In [59]:
for i, val in enumerate(names):
    if i <0:
        continue
    val1 = '../../data/chipseq_'+project+'/' + val +dmsoname+'_mat.gz'
    val2 = '../../data/chipseq_'+project+'/' + val +condname+'_mat.gz'
    print(val)
    print(os.path.basename(val1))
    print(os.path.basename(val2))

MED1_R1
MED1_R1_DMSO_mat.gz
MED1_R1_MYB_ko_mat.gz
MYB_R1
MYB_R1_DMSO_mat.gz
MYB_R1_MYB_ko_mat.gz
MYC_R1
MYC_R1_DMSO_mat.gz
MYC_R1_MYB_ko_mat.gz
POLII_total_R1
POLII_total_R1_DMSO_mat.gz
POLII_total_R1_MYB_ko_mat.gz


why use [DMSO,VHL] instead of [dmsoname, condname] like above?

In [60]:
for i, val in enumerate(names):
    if i <0:
        continue
    val1 = '../../data/chipseq_'+project+'/' + val +dmsoname+'_mat.gz'
    val2 = '../../data/chipseq_'+project+'/' + val +condname+'_mat.gz'
    print(val)
    genepyPlot.makeProfiles(matx=[val1,val2], matnames=['DMSO','VHL'], title=val, 
                            refpoint="center", name='../../data/chipseq_'+project+'/'+val+'_combined_mat.pdf', 
                            legendLoc="lower-left")

MED1_R1
CompletedProcess(args='computeMatrixOperations relabel -m ../../data/chipseq_MYB_degraded_v1/MED1_R1_DMSO_mat.gz -o ../../data/chipseq_MYB_degraded_v1/MED1_R1_DMSO_mat.gz --groupLabels DMSO && computeMatrixOperations relabel -m ../../data/chipseq_MYB_degraded_v1/MED1_R1_MYB_ko_mat.gz -o ../../data/chipseq_MYB_degraded_v1/MED1_R1_MYB_ko_mat.gz --groupLabels VHL && computeMatrixOperations rbind -m ../../data/chipseq_MYB_degraded_v1/MED1_R1_DMSO_mat.gz ../../data/chipseq_MYB_degraded_v1/MED1_R1_MYB_ko_mat.gz -o ../../data/chipseq_MYB_degraded_v1/MED1_R1_combined_mat.gz && plotProfile --matrixFile ../../data/chipseq_MYB_degraded_v1/MED1_R1_combined_mat.gz --outFileName ../../data/chipseq_MYB_degraded_v1/MED1_R1_combined_mat.pdf --refPointLabel center --legendLocation lower-left --plotTitle MED1_R1', returncode=0, stdout=b'', stderr=b'')
MYB_R1
CompletedProcess(args='computeMatrixOperations relabel -m ../../data/chipseq_MYB_degraded_v1/MYB_R1_DMSO_mat.gz -o ../../data/chipseq_MYB_de

In [61]:
h.createFoldersFor("../results/chipseq_"+project+"/plots/scaled/heatmaps/")
! cp ../../data/chipseq_$project/*.pdf ../results/chipseq_$project/plots/scaled/heatmaps/

In [63]:
cond1peak = ! ls ../../data/chipseq_$project/diffPeaks/*cond1.bed
cond2peak = ! ls ../../data/chipseq_$project/diffPeaks/*cond2.bed
commonpeak = ! ls ../../data/chipseq_$project/diffPeaks/*common.bed

In [68]:
for i in range(int(len(bw)/2)):
    if i <0:
        continue
    # DMSO & VHL bw
    name1 = bw[i]
    name2 = bw[i+len(names)]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
   # for val in peak:
      #  chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name)
    genepyPlot.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], 
                          peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], 
                          window=3000, folder="", title=name, numthreads=8, refpoint="center", 
                          name='../../data/chipseq_'+project+'/diffPeaks/'+name+'_mat.pdf', 
                          withDeeptools=True, torecompute=False)

MED1_R1
CompletedProcess(args="computeMatrix reference-point -S ../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_DMSO-MED1_R1.bw ../../data/chipseq_MYB_degraded_v1/recalib_bigwig/MV411_MYB_VHL-MED1_R1.bw  --referencePoint center --regionsFileName ../../data/chipseq_MYB_degraded_v1/diffPeaks/MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_MYB_VHL-MED1_R1_treat_pileup_c3.0_cond1.bed ../../data/chipseq_MYB_degraded_v1/diffPeaks/MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_MYB_VHL-MED1_R1_treat_pileup_c3.0_common.bed ../../data/chipseq_MYB_degraded_v1/diffPeaks/MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_MYB_VHL-MED1_R1_treat_pileup_c3.0_cond2.bed  --missingDataAsZero --outFileName ../../data/chipseq_MYB_degraded_v1/diffPeaks/MED1_R1_mat.gz --upstream 3000 --downstream 3000 --numberOfProcessors 8 && plotHeatmap --matrixFile ../../data/chipseq_MYB_degraded_v1/diffPeaks/MED1_R1_mat.gz --outFileName ../../data/chipseq_MYB_degraded_v1/diffPeaks/MED1_R1_mat.pdf --refPointLabel center -

In [6]:
! mkdir ../results/chipseq_$project/plots/scaled/diffPeaks/
! cp ../../data/chipseq_$project/diffPeaks/*.pdf ../results/chipseq_$project/plots/scaled/diffPeaks/

## on unscalled data

In [7]:
! mkdir ../../data/chipseq_$project/diffPeaks_unscaled

In [8]:
bams = ! ls ../../data/chipseq_$project/bwa/mergedLibrary/*.bam
bams

['../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MED1_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MYB_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MYC_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-POLII_total_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MED1_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MYB_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MYC_R1.mLb.clN.sorted.bam',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-POLII_total_R1.mLb.clN.sorted.bam']

In [14]:
names

['MED1_R1', 'MYB_R1', 'MYC_R1', 'POLII_total_R1']

### diffPeak on unscaled data (full reprocessing)

In [17]:
%%time

# on unscalled data
for i in range(int(len(bams)/2)):
    if i < 0:
        continue
    name1 = bams[1+i]
    name2 = bams[1+len(names)+i]
    print(os.path.basename(name1))
    print(os.path.basename(name2))
    # call diff peak binding w/ macs3
    print(await chip.fullDiffPeak(name1,name2, control1="../../data/chipseq_"+project+"/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam", 
                                  directory = "../../data/chipseq_"+project+"/diffData_unscaled/", 
                                  res_directory = "../../data/chipseq_"+project+"/diffPeaks_unscaled/", pairedend=True))

MV411_MYB_DMSO-MED1_R1.mLb.clN.sorted.bam
MV411_MYB_VHL-MED1_R1.mLb.clN.sorted.bam
doing diff from ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MED1_R1.mLb.clN.sorted.bam and ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MED1_R1.mLb.clN.sorted.bam
computing the fragment avg size
218
computing the scaling values
b'INFO  @ Wed, 18 May 2022 17:21:27: \n# Command line: callpeak -B -t ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MED1_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 218 -n MV411_MYB_DMSO-MED1_R1 --outdir ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_MYB_DMSO-MED1_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_DMSO-MED1_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1

b'INFO  @ Wed, 18 May 2022 17:43:16: \n# Command line: callpeak -B -t ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MED1_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 218 -n MV411_MYB_VHL-MED1_R1 --outdir ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_MYB_VHL-MED1_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MED1_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards smaller dataset.\n# Ra

CompletedProcess(args='macs3 bdgdiff --t1 ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/MV411_MYB_DMSO-MED1_R1_treat_pileup.bdg --c1 ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/MV411_MYB_DMSO-MED1_R1_control_lambda.bdg --t2 ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/MV411_MYB_VHL-MED1_R1_treat_pileup.bdg --c2 ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/MV411_MYB_VHL-MED1_R1_control_lambda.bdg --d1 37355451 --d2 34867756 -g 60 -l 218 --o-prefix MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_MYB_VHL-MED1_R1_treat_pileup --outdir ../../data/chipseq_MYB_degraded_v1/diffPeaks_unscaled/', returncode=0, stdout=b'', stderr=b'INFO  @ Wed, 18 May 2022 18:04:37: Read and build treatment 1 bedGraph... \nINFO  @ Wed, 18 May 2022 18:05:33: Read and build control 1 bedGraph... \nINFO  @ Wed, 18 May 2022 18:08:14: Read and build treatment 2 bedGraph... \nINFO  @ Wed, 18 May 2022 18:09:02: Read and build control 2 bedGraph... \nINFO  @ Wed, 18 May 2022 18:18:14: Write p

b'INFO  @ Wed, 18 May 2022 18:44:43: \n# Command line: callpeak -B -t ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MYB_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 221 -n MV411_MYB_VHL-MYB_R1 --outdir ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_MYB_VHL-MYB_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MYB_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards smaller dataset.\n# Range 

CompletedProcess(args='macs3 bdgdiff --t1 ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/MV411_MYB_DMSO-MYB_R1_treat_pileup.bdg --c1 ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/MV411_MYB_DMSO-MYB_R1_control_lambda.bdg --t2 ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/MV411_MYB_VHL-MYB_R1_treat_pileup.bdg --c2 ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/MV411_MYB_VHL-MYB_R1_control_lambda.bdg --d1 41185974 --d2 36811247 -g 60 -l 221 --o-prefix MV411_MYB_DMSO-MYB_R1_treat_pileup_vs_MV411_MYB_VHL-MYB_R1_treat_pileup --outdir ../../data/chipseq_MYB_degraded_v1/diffPeaks_unscaled/', returncode=0, stdout=b'', stderr=b'INFO  @ Wed, 18 May 2022 19:07:14: Read and build treatment 1 bedGraph... \nINFO  @ Wed, 18 May 2022 19:08:09: Read and build control 1 bedGraph... \nINFO  @ Wed, 18 May 2022 19:10:42: Read and build treatment 2 bedGraph... \nINFO  @ Wed, 18 May 2022 19:11:30: Read and build control 2 bedGraph... \nINFO  @ Wed, 18 May 2022 19:21:12: Write peaks..

b'INFO  @ Wed, 18 May 2022 19:44:38: \n# Command line: callpeak -B -t ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MYC_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 225 -n MV411_MYB_VHL-MYC_R1 --outdir ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_MYB_VHL-MYC_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-MYC_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled towards smaller dataset.\n# Range 

CompletedProcess(args='macs3 bdgdiff --t1 ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/MV411_MYB_DMSO-MYC_R1_treat_pileup.bdg --c1 ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/MV411_MYB_DMSO-MYC_R1_control_lambda.bdg --t2 ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/MV411_MYB_VHL-MYC_R1_treat_pileup.bdg --c2 ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/MV411_MYB_VHL-MYC_R1_control_lambda.bdg --d1 30876116 --d2 26069961 -g 60 -l 225 --o-prefix MV411_MYB_DMSO-MYC_R1_treat_pileup_vs_MV411_MYB_VHL-MYC_R1_treat_pileup --outdir ../../data/chipseq_MYB_degraded_v1/diffPeaks_unscaled/', returncode=0, stdout=b'', stderr=b'INFO  @ Wed, 18 May 2022 20:06:19: Read and build treatment 1 bedGraph... \nINFO  @ Wed, 18 May 2022 20:06:59: Read and build control 1 bedGraph... \nINFO  @ Wed, 18 May 2022 20:09:28: Read and build treatment 2 bedGraph... \nINFO  @ Wed, 18 May 2022 20:10:03: Read and build control 2 bedGraph... \nINFO  @ Wed, 18 May 2022 20:18:41: Write peaks..

b'INFO  @ Wed, 18 May 2022 20:45:04: \n# Command line: callpeak -B -t ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-POLII_total_R1.mLb.clN.sorted.bam -c ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam --nomodel --extsize 220 -n MV411_MYB_VHL-POLII_total_R1 --outdir ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/ -f BAMPE\n# ARGUMENTS LIST:\n# name = MV411_MYB_VHL-POLII_total_R1\n# format = BAMPE\n# ChIP-seq file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/MV411_MYB_VHL-POLII_total_R1.mLb.clN.sorted.bam\']\n# control file = [\'../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/INPUT_R1.mLb.clN.sorted.bam\']\n# effective genome size = 2.70e+09\n# band width = 300\n# model fold = [5, 50]\n# qvalue cutoff = 5.00e-02\n# The maximum gap between significant sites is assigned as the read length/tag size.\n# The minimum length of peaks is assigned as the predicted fragment length "d".\n# Larger dataset will be scaled to

CompletedProcess(args='macs3 bdgdiff --t1 ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/MV411_MYB_DMSO-POLII_total_R1_treat_pileup.bdg --c1 ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/MV411_MYB_DMSO-POLII_total_R1_control_lambda.bdg --t2 ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/MV411_MYB_VHL-POLII_total_R1_treat_pileup.bdg --c2 ../../data/chipseq_MYB_degraded_v1/diffData_unscaled/MV411_MYB_VHL-POLII_total_R1_control_lambda.bdg --d1 37741615 --d2 42873240 -g 60 -l 220 --o-prefix MV411_MYB_DMSO-POLII_total_R1_treat_pileup_vs_MV411_MYB_VHL-POLII_total_R1_treat_pileup --outdir ../../data/chipseq_MYB_degraded_v1/diffPeaks_unscaled/', returncode=0, stdout=b'', stderr=b'INFO  @ Wed, 18 May 2022 21:08:15: Read and build treatment 1 bedGraph... \nINFO  @ Wed, 18 May 2022 21:09:04: Read and build control 1 bedGraph... \nINFO  @ Wed, 18 May 2022 21:11:37: Read and build treatment 2 bedGraph... \nINFO  @ Wed, 18 May 2022 21:12:31: Read and build control 2 bedGraph... \n

In [35]:
bed_counts_unscaled = ! wc -l ../../data/chipseq_$project/diffPeaks_unscaled/*.bed

In [53]:
df_diff_peak = df_diff_peak.append(getDiffPeakCounts(bed_counts_unscaled, scaled="UNSCALED", names=names), ignore_index=True)

In [55]:
print(df_diff_peak.shape)
df_diff_peak

(24, 5)


Unnamed: 0,SCALE,SAMPLE,PEAK_TYPE,FILE,N_PEAK
0,SCALED,MED1_R1,COMMON,MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_M...,5951
1,SCALED,MED1_R1,DMSO,MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_M...,490
2,SCALED,MED1_R1,VHL,MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_M...,62
3,SCALED,MYB_R1,COMMON,MV411_MYB_DMSO-MYB_R1_treat_pileup_vs_MV411_MY...,25366
4,SCALED,MYB_R1,DMSO,MV411_MYB_DMSO-MYB_R1_treat_pileup_vs_MV411_MY...,18826
5,SCALED,MYB_R1,VHL,MV411_MYB_DMSO-MYB_R1_treat_pileup_vs_MV411_MY...,893
6,SCALED,MYC_R1,COMMON,MV411_MYB_DMSO-MYC_R1_treat_pileup_vs_MV411_MY...,1
7,SCALED,MYC_R1,DMSO,MV411_MYB_DMSO-MYC_R1_treat_pileup_vs_MV411_MY...,1
8,SCALED,MYC_R1,VHL,MV411_MYB_DMSO-MYC_R1_treat_pileup_vs_MV411_MY...,39313
9,SCALED,POLII_total_R1,COMMON,MV411_MYB_DMSO-POLII_total_R1_treat_pileup_vs_...,62708


In [56]:
# diff peak count CSV
diff_peak_count_file = "../../data/chipseq_"+project+"/"+project+"_diff_peak_counts.csv"
diff_peak_count_file_path = os.path.abspath(diff_peak_count_file)
print(diff_peak_count_file_path)
df_diff_peak.to_csv(diff_peak_count_file_path, index=False)

/home/monika/data/chipseq_MYB_degraded_v1/MYB_degraded_v1_diff_peak_counts.csv


In [None]:
df_diff_peak.to_csv("../results/chipseq_"+project+"/"+project+"_diff_peak_counts.csv", index=False)

In [21]:
bw = ! ls ../../data/chipseq_$project/bwa/mergedLibrary/bigwig/*.bigWig
bw

['../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/INPUT_R1.bigWig',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/MV411_MYB_DMSO-MED1_R1.bigWig',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/MV411_MYB_DMSO-MYB_R1.bigWig',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/MV411_MYB_DMSO-MYC_R1.bigWig',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/MV411_MYB_DMSO-POLII_total_R1.bigWig',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/MV411_MYB_VHL-MED1_R1.bigWig',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/MV411_MYB_VHL-MYB_R1.bigWig',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/MV411_MYB_VHL-MYC_R1.bigWig',
 '../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/MV411_MYB_VHL-POLII_total_R1.bigWig']

### making overlapping profiles

In [22]:
cond1peak = ! ls ../../data/chipseq_$project/diffPeaks_unscaled/*cond1.bed
cond2peak = ! ls ../../data/chipseq_$project/diffPeaks_unscaled/*cond2.bed
commonpeak = ! ls ../../data/chipseq_$project/diffPeaks_unscaled/*common.bed

In [60]:
for i in range(int(len(bw)/2)):
    if i <0:
        continue
    # DMSO & VHL bw
    name1 = bw[1+i]
    name2 = bw[1+i+len(names)]
    peak = [cond1peak[i], commonpeak[i], cond2peak[i]]
   # for val in peak:
      #  chip.dropWeirdChromosomes(val)
    name = names[i]
    print(name)
    genepyPlot.getPeaksAt(peak, [name1, name2], bigwignames=['DMSO', 'VHL'], 
                          peaknames=['DMSO_peaks', 'common', 'VHL_peaks'], 
                          window=3000, folder="", title=name, numthreads=8, refpoint="center", 
                          name='../../data/chipseq_'+project+'/diffPeaks_unscaled/'+name+'_mat.pdf', 
                          withDeeptools=True, torecompute=True)

MED1_R1
CompletedProcess(args="computeMatrix reference-point -S ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/MV411_MYB_DMSO-MED1_R1.bigWig ../../data/chipseq_MYB_degraded_v1/bwa/mergedLibrary/bigwig/MV411_MYB_VHL-MED1_R1.bigWig  --referencePoint center --regionsFileName ../../data/chipseq_MYB_degraded_v1/diffPeaks_unscaled/MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_MYB_VHL-MED1_R1_treat_pileup_c3.0_cond1.bed ../../data/chipseq_MYB_degraded_v1/diffPeaks_unscaled/MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_MYB_VHL-MED1_R1_treat_pileup_c3.0_common.bed ../../data/chipseq_MYB_degraded_v1/diffPeaks_unscaled/MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_MYB_VHL-MED1_R1_treat_pileup_c3.0_cond2.bed  --missingDataAsZero --outFileName ../../data/chipseq_MYB_degraded_v1/diffPeaks_unscaled/MED1_R1_mat.gz --upstream 3000 --downstream 3000 --numberOfProcessors 8 && plotHeatmap --matrixFile ../../data/chipseq_MYB_degraded_v1/diffPeaks_unscaled/MED1_R1_mat.gz --outFileName ../../data/c

In [62]:
h.createFoldersFor('../results/chipseq_'+project+'/plots/unscaled/diffPeaks/')
! cp ../../data/chipseq_$project/diffPeaks_unscaled/*.pdf ../results/chipseq_$project/plots/unscaled/diffPeaks/

! gsutil -m cp gs://amlproject/Chip/$project/bwa/mergedLibrary/deepTools/**.pdf ../results/chipseq_$project/plots/

Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_MYB_DMSO-MED1_R1.plotFingerprint.pdf...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_MYB_DMSO-MYB_R1.plotFingerprint.pdf...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_MYB_DMSO-MYC_R1.plotFingerprint.pdf...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_MYB_DMSO-POLII_total_R1.plotFingerprint.pdf...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_MYB_VHL-MED1_R1.plotFingerprint.pdf...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_MYB_VHL-MYB_R1.plotFingerprint.pdf...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/mergedLibrary/deepTools/plotFingerprint/MV411_MYB_VHL-MYC_R1.plotFingerprint.pdf...
Copying gs://amlproject/Chip/MYB_degraded_v1/bwa/merged

In [63]:
! gsutil -m cp -r ../../data/chipseq_$project/diffPeaks gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/chipseq_$project/diffData gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/chipseq_$project/diffPeaks_unscaled gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/chipseq_$project/diffData_unscaled gs://amlproject/Chip/$project/
! gsutil -m cp -r ../../data/chipseq_$project/peakplot gs://amlproject/Chip/$project/

Copying file://../../data/chipseq_MYB_degraded_v1/diffPeaks/POLII_total_R1_mat.gz [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MYB_degraded_v1/diffPeaks/MV411_MYB_DMSO-POLII_total_R1_treat_pileup_vs_MV411_MYB_VHL-POLII_total_R1_treat_pileup_c3.0_common.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MYB_degraded_v1/diffPeaks/MV411_MYB_DMSO-MYC_R1_treat_pileup_vs_MV411_MYB_VHL-MYC_R1_treat_pileup_c3.0_cond2.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MYB_degraded_v1/diffPeaks/MYB_R1_mat.gz [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MYB_degraded_v1/diffPeaks/MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_MYB_VHL-MED1_R1_treat_pileup_c3.0_common.bed [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MYB_degraded_v1/diffPeaks/MYC_R1_mat.pdf [Content-Type=application/pdf]...
Copying file://../../data/chipseq_MYB_degraded_v1/diffPeaks

Copying file://../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_VHL-MYC_R1_control_lambda.bdg [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_VHL-MYB_R1_control_lambda.bdg [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_DMSO-MYB_R1_peaks.narrowPeak [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MYB_degraded_v1/diffData/MV411_MYB_DMSO-MYC_R1_treat_pileup.bdg [Content-Type=application/octet-stream]...
/ [40/40 files][ 64.0 GiB/ 64.0 GiB] 100% Done 180.4 MiB/s ETA 00:00:00         
Operation completed over 40 objects/64.0 GiB.                                    
Copying file://../../data/chipseq_MYB_degraded_v1/diffPeaks_unscaled/POLII_total_R1_mat.gz [Content-Type=application/octet-stream]...
Copying file://../../data/chipseq_MYB_degraded_v1/diffPeaks_unscaled/MV411_MYB_DMSO-MED1_R1_treat_pileup_vs_MV411_MYB_VHL-MED1_R1_treat_p

- [40/40 files][ 64.0 GiB/ 64.0 GiB] 100% Done 167.4 MiB/s ETA 00:00:00         
Operation completed over 40 objects/64.0 GiB.                                    
CommandException: No URLs matched: ../../data/chipseq_MYB_degraded_v1/peakplot
CommandException: 1 file/object could not be transferred.
