# SLAMseq

## Installing slamdunk (only do once)

In [1]:
# install slamdunk
#! pip3 install git+https://github.com/jkobject/slamdunk.git --upgrade

In [2]:
# clone and install genepy
# pip install -e genepy

In [3]:
# please also install cutadapt, fastqc, and trimgalore
#! conda install -c bioconda cutadapt
#! conda install -c bioconda fastqc
#! conda install -c bioconda trim-galore

In [4]:
from __future__ import print_function
import os.path
import pandas as pd
import sys
sys.path.insert(0, '../../')
import seaborn as sns
import numpy as np
from natsort import os_sorted

from genepy.utils import helper as h
from genepy.utils import plot as genepyPlot
from genepy.rna import pyDESeq2
import genepy.rna as rna

In [6]:
from bokeh.plotting import *
from bokeh.models import HoverTool
from bokeh.io import output_notebook

import matplotlib.pyplot as plt

from sklearn.neighbors import KNeighborsClassifier
from sklearn.manifold import MDS, TSNE
from sklearn.decomposition import PCA
from sklearn.preprocessing import scale
#from umap import UMAP

output_notebook()
%load_ext autoreload
%matplotlib inline
%autoreload 2
%load_ext rpy2.ipython

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [7]:
def createDir(directory):
    if not os.path.exists(directory):
        print("Creating output directory: " + directory)
        os.makedirs(directory)

# MV411 inhibitors

SLAMseq of MV411 with MYC, CDK9, THC1 inhibitors
<br>1h incubation with respective agent + 1h 4sU with 0.5$\mu$M VHL (with ERCC spike-in control mix #1 used)
<br>MS2: 50$\mu$M
<br>JQ1: 1$\mu$M
<br>MYCi361: 10$\mu$M
<br>THZ1: 10$\mu$M
<br>DMSO brought up to 0.25% for all samples
<br>__Processed without alt loci__

In [8]:
project = "inhibitors_v2"

In [9]:
location = "../data/slamseq_"+project+"/" # new data location

## download the data

In [10]:
location = "../data/slamseq_"+project+"/" # new data location
loc = "220610_MP10091_fastq" # transfer AML folder

In [11]:
! mkdir ../$location
! mkdir ../results/slamseq_$project
! mkdir $location && mkdir $location/qc # make data dir in AMLproject
! mkdir ../$location/fastqs/ # make dir in /home/monika/data/project/fastqs

In [13]:
! gsutil ls gs://transfer-amlproject/$loc/*.fastq.gz

gs://transfer-amlproject/220610_MP10091_fastq/20220610_10_MV411_JQ1_B_MP10091_S10_R1_001.fastq.gz
gs://transfer-amlproject/220610_MP10091_fastq/20220610_10_MV411_JQ1_B_MP10091_S10_R2_001.fastq.gz
gs://transfer-amlproject/220610_MP10091_fastq/20220610_11_MV411_JQ1_C_MP10091_S11_R1_001.fastq.gz
gs://transfer-amlproject/220610_MP10091_fastq/20220610_11_MV411_JQ1_C_MP10091_S11_R2_001.fastq.gz
gs://transfer-amlproject/220610_MP10091_fastq/20220610_12_MV411_JQ1_D_MP10091_S12_R1_001.fastq.gz
gs://transfer-amlproject/220610_MP10091_fastq/20220610_12_MV411_JQ1_D_MP10091_S12_R2_001.fastq.gz
gs://transfer-amlproject/220610_MP10091_fastq/20220610_13_MV411_MYCi361_A_MP10091_S13_R1_001.fastq.gz
gs://transfer-amlproject/220610_MP10091_fastq/20220610_13_MV411_MYCi361_A_MP10091_S13_R2_001.fastq.gz
gs://transfer-amlproject/220610_MP10091_fastq/20220610_14_MV411_MYCi361_B_MP10091_S14_R1_001.fastq.gz
gs://transfer-amlproject/220610_MP10091_fastq/20220610_14_MV411_MYCi361_B_MP10091_S14_R2_001.fast

In [14]:
# transfer qc files
! gsutil -m cp gs://transfer-amlproject/$loc/multiqc_report.html $location/qc/
! gsutil -m cp -r gs://transfer-amlproject/$loc/Reports/ $location/qc/
! gsutil -m cp -r gs://transfer-amlproject/$loc/multiqc_data/ $location/qc/

Copying gs://transfer-amlproject/220610_MP10091_fastq/multiqc_report.html...
/ [1/1 files][  1.7 MiB/  1.7 MiB] 100% Done                                    
Operation completed over 1 objects/1.7 MiB.                                      
Copying gs://transfer-amlproject/220610_MP10091_fastq/Reports/20220610_10_MV411_JQ1_B_MP10091_S10_R1_001_fastqc.html...
Copying gs://transfer-amlproject/220610_MP10091_fastq/Reports/20220610_10_MV411_JQ1_B_MP10091_S10_R1_001_fastqc.zip...
Copying gs://transfer-amlproject/220610_MP10091_fastq/Reports/20220610_10_MV411_JQ1_B_MP10091_S10_R2_001_fastqc.html...
Copying gs://transfer-amlproject/220610_MP10091_fastq/Reports/20220610_10_MV411_JQ1_B_MP10091_S10_R2_001_fastqc.zip...
Copying gs://transfer-amlproject/220610_MP10091_fastq/Reports/20220610_11_MV411_JQ1_C_MP10091_S11_R1_001_fastqc.html...
Copying gs://transfer-amlproject/220610_MP10091_fastq/Reports/20220610_11_MV411_JQ1_C_MP10091_S11_R1_001_fastqc.zip...
Copying gs://transfer-amlproject/220610_MP1

Copying gs://transfer-amlproject/220610_MP10091_fastq/Reports/20220610_26_MV411_JQ1THZ1_B_MP10091_S26_R1_001_fastqc.zip...
Copying gs://transfer-amlproject/220610_MP10091_fastq/Reports/20220610_26_MV411_JQ1THZ1_B_MP10091_S26_R2_001_fastqc.html...
Copying gs://transfer-amlproject/220610_MP10091_fastq/Reports/20220610_26_MV411_JQ1THZ1_B_MP10091_S26_R2_001_fastqc.zip...
Copying gs://transfer-amlproject/220610_MP10091_fastq/Reports/20220610_27_MV411_JQ1THZ1_C_MP10091_S27_R1_001_fastqc.html...
Copying gs://transfer-amlproject/220610_MP10091_fastq/Reports/20220610_27_MV411_JQ1THZ1_C_MP10091_S27_R1_001_fastqc.zip...
Copying gs://transfer-amlproject/220610_MP10091_fastq/Reports/20220610_27_MV411_JQ1THZ1_C_MP10091_S27_R2_001_fastqc.html...
Copying gs://transfer-amlproject/220610_MP10091_fastq/Reports/20220610_28_MV411_JQ1THZ1_D_MP10091_S28_R1_001_fastqc.html...
Copying gs://transfer-amlproject/220610_MP10091_fastq/Reports/20220610_27_MV411_JQ1THZ1_C_MP10091_S27_R2_001_fastqc.zip...
Copying gs:/

In [15]:
! cp $location/qc/multiqc_report.html ../results/slamseq_$project

In [16]:
fastqs = ! gsutil ls gs://transfer-amlproject/$loc/*.gz

add sample names to AML sample tracker google sheet (RNA_tracker_JK)

In [17]:
print(len(fastqs))
[f.split("_R")[0] for f in fastqs[::2]]

56


['gs://transfer-amlproject/220610_MP10091_fastq/20220610_10_MV411_JQ1_B_MP10091_S10',
 'gs://transfer-amlproject/220610_MP10091_fastq/20220610_11_MV411_JQ1_C_MP10091_S11',
 'gs://transfer-amlproject/220610_MP10091_fastq/20220610_12_MV411_JQ1_D_MP10091_S12',
 'gs://transfer-amlproject/220610_MP10091_fastq/20220610_13_MV411_MYCi361_A_MP10091_S13',
 'gs://transfer-amlproject/220610_MP10091_fastq/20220610_14_MV411_MYCi361_B_MP10091_S14',
 'gs://transfer-amlproject/220610_MP10091_fastq/20220610_15_MV411_MYCi361_C_MP10091_S15',
 'gs://transfer-amlproject/220610_MP10091_fastq/20220610_16_MV411_MYCi361_D_MP10091_S16',
 'gs://transfer-amlproject/220610_MP10091_fastq/20220610_17_MV411_THZ1_A_MP10091_S17',
 'gs://transfer-amlproject/220610_MP10091_fastq/20220610_18_MV411_THZ1_B_MP10091_S18',
 'gs://transfer-amlproject/220610_MP10091_fastq/20220610_19_MV411_THZ1_C_MP10091_S19',
 'gs://transfer-amlproject/220610_MP10091_fastq/20220610_1_MV411_DMSO_A_MP10091_S1',
 'gs://transfer-amlproject/220610_MP

In [18]:
fastqs = [fastq.split("/")[-1] for fastq in os_sorted(fastqs)]
print(len(fastqs))
# add sample names to AML sample tracker google sheet
for val1, val2 in h.grouped(fastqs, 2):
    print(val1.split("_R")[0])

56
20220610_1_MV411_DMSO_A_MP10091_S1
20220610_2_MV411_DMSO_B_MP10091_S2
20220610_3_MV411_DMSO_C_MP10091_S3
20220610_4_MV411_DMSO_D_MP10091_S4
20220610_5_MV411_MS2_A_MP10091_S5
20220610_6_MV411_MS2_B_MP10091_S6
20220610_7_MV411_MS2_C_MP10091_S7
20220610_8_MV411_MS2_D_MP10091_S8
20220610_9_MV411_JQ1_A_MP10091_S9
20220610_10_MV411_JQ1_B_MP10091_S10
20220610_11_MV411_JQ1_C_MP10091_S11
20220610_12_MV411_JQ1_D_MP10091_S12
20220610_13_MV411_MYCi361_A_MP10091_S13
20220610_14_MV411_MYCi361_B_MP10091_S14
20220610_15_MV411_MYCi361_C_MP10091_S15
20220610_16_MV411_MYCi361_D_MP10091_S16
20220610_17_MV411_THZ1_A_MP10091_S17
20220610_18_MV411_THZ1_B_MP10091_S18
20220610_19_MV411_THZ1_C_MP10091_S19
20220610_20_MV411_THZ1_D_MP10091_S20
20220610_21_MV411_JQ1MS2_A_MP10091_S21
20220610_22_MV411_JQ1MS2_B_MP10091_S22
20220610_23_MV411_JQ1MS2_C_MP10091_S23
20220610_24_MV411_JQ1MS2_D_MP10091_S24
20220610_25_MV411_JQ1THZ1_A_MP10091_S25
20220610_26_MV411_JQ1THZ1_B_MP10091_S26
20220610_27_MV411_JQ1THZ1_C_MP10091

In [19]:
# transfer fastqs
! gsutil -m cp gs://transfer-amlproject/$loc/*.fastq.gz ../$location/fastqs/

Copying gs://transfer-amlproject/220610_MP10091_fastq/20220610_10_MV411_JQ1_B_MP10091_S10_R1_001.fastq.gz...
Copying gs://transfer-amlproject/220610_MP10091_fastq/20220610_10_MV411_JQ1_B_MP10091_S10_R2_001.fastq.gz...
Copying gs://transfer-amlproject/220610_MP10091_fastq/20220610_11_MV411_JQ1_C_MP10091_S11_R1_001.fastq.gz...
Copying gs://transfer-amlproject/220610_MP10091_fastq/20220610_12_MV411_JQ1_D_MP10091_S12_R1_001.fastq.gz...
Copying gs://transfer-amlproject/220610_MP10091_fastq/20220610_11_MV411_JQ1_C_MP10091_S11_R2_001.fastq.gz...
Copying gs://transfer-amlproject/220610_MP10091_fastq/20220610_12_MV411_JQ1_D_MP10091_S12_R2_001.fastq.gz...
Copying gs://transfer-amlproject/220610_MP10091_fastq/20220610_13_MV411_MYCi361_A_MP10091_S13_R1_001.fastq.gz...
Copying gs://transfer-amlproject/220610_MP10091_fastq/20220610_13_MV411_MYCi361_A_MP10091_S13_R2_001.fastq.gz...
Copying gs://transfer-amlproject/220610_MP10091_fastq/20220610_14_MV411_MYCi361_B_MP10091_S14_R1_001.fastq.gz...
Copying

## rename local fastqs

In [20]:
fastq_folder = "../"+location+"fastqs"
print(fastq_folder)
fastqs = ! ls $fastq_folder
fastqs = os_sorted(fastqs)
fastqs

../../data/slamseq_inhibitors_v2/fastqs


['20220610_1_MV411_DMSO_A_MP10091_S1_R1_001.fastq.gz',
 '20220610_1_MV411_DMSO_A_MP10091_S1_R2_001.fastq.gz',
 '20220610_2_MV411_DMSO_B_MP10091_S2_R1_001.fastq.gz',
 '20220610_2_MV411_DMSO_B_MP10091_S2_R2_001.fastq.gz',
 '20220610_3_MV411_DMSO_C_MP10091_S3_R1_001.fastq.gz',
 '20220610_3_MV411_DMSO_C_MP10091_S3_R2_001.fastq.gz',
 '20220610_4_MV411_DMSO_D_MP10091_S4_R1_001.fastq.gz',
 '20220610_4_MV411_DMSO_D_MP10091_S4_R2_001.fastq.gz',
 '20220610_5_MV411_MS2_A_MP10091_S5_R1_001.fastq.gz',
 '20220610_5_MV411_MS2_A_MP10091_S5_R2_001.fastq.gz',
 '20220610_6_MV411_MS2_B_MP10091_S6_R1_001.fastq.gz',
 '20220610_6_MV411_MS2_B_MP10091_S6_R2_001.fastq.gz',
 '20220610_7_MV411_MS2_C_MP10091_S7_R1_001.fastq.gz',
 '20220610_7_MV411_MS2_C_MP10091_S7_R2_001.fastq.gz',
 '20220610_8_MV411_MS2_D_MP10091_S8_R1_001.fastq.gz',
 '20220610_8_MV411_MS2_D_MP10091_S8_R2_001.fastq.gz',
 '20220610_9_MV411_JQ1_A_MP10091_S9_R1_001.fastq.gz',
 '20220610_9_MV411_JQ1_A_MP10091_S9_R2_001.fastq.gz',
 '20220610_10_MV411_

In [21]:
from gsheets import Sheets
sheets = Sheets.from_files('~/.client_secret_web.json', '~/.storage.json')
url = "https://docs.google.com/spreadsheets/d/1yFLjYB1McU530JnLgL0QIMAKIkVl3kl0_LCHje2gk8U/edit?ts=5fab1071#gid=738732237"
gsheet = sheets.get(url).sheets[12].to_frame()

In [22]:
# get new sample names from AML sample tracker - RNA_tracker_JK
idx = gsheet.index[gsheet['batch'] == project] # get first row with project number
sample_info = gsheet.loc[idx[0]:, ["prev_name", "name"]]
sample_info = sample_info.iloc[:int(len(fastqs)/2),]
print(sample_info.shape)

(28, 2)


In [23]:
# make dict with names
rename = dict(zip(sample_info.prev_name, sample_info.name))

# rename fastqs locally
for val in fastqs:
    rep  = val
    for old, new in rename.items():
        rep = rep.replace(old, new)
    if rep != val:
        ! mv $fastq_folder/$val $fastq_folder/$rep

# Processing

In [24]:
project

'inhibitors_v2'

In [25]:
fastq_folder = "../"+location+"fastqs"
print(fastq_folder)
fastqs = ! ls $fastq_folder/*fastq.gz
fastqs[::2]

../../data/slamseq_inhibitors_v2/fastqs


['../../data/slamseq_inhibitors_v2/fastqs/mr590-MV411-DMSO_1h-r1_R1_001.fastq.gz',
 '../../data/slamseq_inhibitors_v2/fastqs/mr591-MV411-DMSO_1h-r2_R1_001.fastq.gz',
 '../../data/slamseq_inhibitors_v2/fastqs/mr592-MV411-DMSO_1h-r3_R1_001.fastq.gz',
 '../../data/slamseq_inhibitors_v2/fastqs/mr593-MV411-DMSO_1h-r4_R1_001.fastq.gz',
 '../../data/slamseq_inhibitors_v2/fastqs/mr594-MV411-MS2_1h-r1_R1_001.fastq.gz',
 '../../data/slamseq_inhibitors_v2/fastqs/mr595-MV411-MS2_1h-r2_R1_001.fastq.gz',
 '../../data/slamseq_inhibitors_v2/fastqs/mr596-MV411-MS2_1h-r3_R1_001.fastq.gz',
 '../../data/slamseq_inhibitors_v2/fastqs/mr597-MV411-MS2_1h-r4_R1_001.fastq.gz',
 '../../data/slamseq_inhibitors_v2/fastqs/mr598-MV411-JQ1_1h-r1_R1_001.fastq.gz',
 '../../data/slamseq_inhibitors_v2/fastqs/mr599-MV411-JQ1_1h-r2_R1_001.fastq.gz',
 '../../data/slamseq_inhibitors_v2/fastqs/mr600-MV411-JQ1_1h-r3_R1_001.fastq.gz',
 '../../data/slamseq_inhibitors_v2/fastqs/mr601-MV411-JQ1_1h-r4_R1_001.fastq.gz',
 '../../data

## trim_galore

In [31]:
for val1, val2 in h.grouped(fastqs[16:], 2):
    print(os.path.basename(val1))

mr598-MV411-JQ1_1h-r1_R1_001.fastq.gz
mr599-MV411-JQ1_1h-r2_R1_001.fastq.gz
mr600-MV411-JQ1_1h-r3_R1_001.fastq.gz
mr601-MV411-JQ1_1h-r4_R1_001.fastq.gz
mr602-MV411-MYCi361_1h-r1_R1_001.fastq.gz
mr603-MV411-MYCi361_1h-r2_R1_001.fastq.gz
mr604-MV411-MYCi361_1h-r3_R1_001.fastq.gz
mr605-MV411-MYCi361_1h-r4_R1_001.fastq.gz
mr606-MV411-THZ1_1h-r1_R1_001.fastq.gz
mr607-MV411-THZ1_1h-r2_R1_001.fastq.gz
mr608-MV411-THZ1_1h-r3_R1_001.fastq.gz
mr609-MV411-THZ1_1h-r4_R1_001.fastq.gz
mr610-MV411-JQ1_MS2_1h-r1_R1_001.fastq.gz
mr611-MV411-JQ1_MS2_1h-r2_R1_001.fastq.gz
mr612-MV411-JQ1_MS2_1h-r3_R1_001.fastq.gz
mr613-MV411-JQ1_MS2_1h-r4_R1_001.fastq.gz
mr614-MV411-JQ1_THZ1_1h-r1_R1_001.fastq.gz
mr615-MV411-JQ1_THZ1_1h-r2_R1_001.fastq.gz
mr616-MV411-JQ1_THZ1_1h-r3_R1_001.fastq.gz
mr617-MV411-JQ1_THZ1_1h-r4_R1_001.fastq.gz


In [None]:
%%time
h.parrun(['trim_galore --paired --cores 8 --retain_unpaired -stringency 3\
 --illumina '+val1+' '+val2+' -o '+fastq_folder for val1, val2 in h.grouped(fastqs[16:], 2)], 2)

## slamdunk

install slamdunk & dependencies if needed (see above)

In [33]:
# install slamdunk
#! pip3 install git+https://github.com/jkobject/slamdunk.git --upgrade

In [34]:
# please also install cutadapt, fastqc, and trimgalore
#! conda install -c bioconda cutadapt
#! conda install -c bioconda fastqc
#! conda install -c bioconda trim-galore

download reference genome files

In [35]:
# google storage bucket with reference files
#! gsutil ls gs://ccle_default_params

# copy reference genome files
#! gsutil -m cp gs://ccle_default_params/Homo_sapiens_assembly38* ../../ref/

# copy reference genome files
#! cp ../data/Muhar_Slamseq/{ERCC92.fa,ERCC92.gtf,GSE100708_hg38_refseq_062016_ensemblv84_3UTR.bed} ../../ref

PySam warning can be ignored.
<br>[GitHub issue: [E::idx_find_and_load] Could not retrieve index file" when AlignmentFile](https://github.com/pysam-developers/pysam/issues/939)

In [36]:
# using an ERCC ref genome (you can just append ERCC fasta to the hg38 fasta)
refgenome_noAlt="../../ref/Homo_sapiens_assembly38_ERCC92_noAlt.fasta"

In [37]:
# gene intervals bed (use for all projects)
gene_intervals = "../../ref/GSE100708_hg38_refseq_062016_ensemblv84_3UTR.bed"

res = slamdunk all w/ (-fb, -m, -n 1, -rl 152, -r Homo_sapiens_assembly38_ERCC92_noAlt.fasta)

Read length note!
<br>Difference between min and max read length is > 100bp
<br>Average read length 127 (min: 8, max: 152)

In [38]:
fastq_folder = "../../data/"+"slamseq_"+project+"/fastqs"
fastqs = ! ls $fastq_folder
fastqs = [fastq for fastq in fastqs if "val" in fastq]
fastq_full = [fastq_folder+"/"+f for f in fastqs]

In [39]:
res_dir = "../../data/"+"slamseq_"+project+"/res"
res_dir

'../../data/slamseq_inhibitors_v2/res'

In [40]:
%%time

# print call
print(['slamdunk all'+
       ' -o '+res_dir+
       ' -r '+refgenome_noAlt+
       ' -b '+gene_intervals+
       ' -t 14'+
       ' -c 2'+
       ' -rl 152'+ # max-read-length = 152
       # new parameters
       ' -5 12'+
       ' -n 1'+ # reads map to multiple locations at once
       ' -m'+ # multimapping
       ' -N '+val1.split('/')[-1].split('_R')[0]+
       ' '+val1+" "+val2
       for val1, val2 in h.grouped(fastq_full[:2], 2)])

['slamdunk all -o ../../data/slamseq_inhibitors_v2/res -r ../../ref/Homo_sapiens_assembly38_ERCC92_noAlt.fasta -b ../../ref/GSE100708_hg38_refseq_062016_ensemblv84_3UTR.bed -t 14 -c 2 -rl 152 -5 12 -n 1 -m -N mr590-MV411-DMSO_1h-r1 ../../data/slamseq_inhibitors_v2/fastqs/mr590-MV411-DMSO_1h-r1_R1_001_val_1.fq.gz ../../data/slamseq_inhibitors_v2/fastqs/mr590-MV411-DMSO_1h-r1_R2_001_val_2.fq.gz']
CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 247 µs


In [41]:
len(fastq_full[::2])

28

In [55]:
for val1, val2 in h.grouped(fastq_full[16:36], 2): print(os.path.basename(val1))

mr598-MV411-JQ1_1h-r1_R1_001_val_1.fq.gz
mr599-MV411-JQ1_1h-r2_R1_001_val_1.fq.gz
mr600-MV411-JQ1_1h-r3_R1_001_val_1.fq.gz
mr601-MV411-JQ1_1h-r4_R1_001_val_1.fq.gz
mr602-MV411-MYCi361_1h-r1_R1_001_val_1.fq.gz
mr603-MV411-MYCi361_1h-r2_R1_001_val_1.fq.gz
mr604-MV411-MYCi361_1h-r3_R1_001_val_1.fq.gz
mr605-MV411-MYCi361_1h-r4_R1_001_val_1.fq.gz
mr606-MV411-THZ1_1h-r1_R1_001_val_1.fq.gz
mr607-MV411-THZ1_1h-r2_R1_001_val_1.fq.gz


In [None]:
%%time

# run slamdunk
# we are doing it paired end
h.parrun(['slamdunk all'+
          ' -o '+res_dir+
          ' -r '+refgenome_noAlt+
          ' -b '+gene_intervals+
          ' -t 14'+
          ' -c 2'+
          ' -rl 152'+ # max-read-length = 152
          # new parameters
          ' -5 12'+
          ' -n 1'+ # reads map to multiple locations at once
          ' -m True'+ # multimapping
          ' -N '+val1.split('/')[-1].split('_R')[0]+
          ' '+val1+" "+val2+
          ' 2>&1 | tee '+res_dir+'/'+os.path.basename(val1)+'_slumdunk.log'
          for val1, val2 in h.grouped(fastq_full[16:36], 2)], 2)

In [17]:
%%time

# run slamdunk
# we are doing it paired end
h.parrun(['slamdunk all'+
          ' -o '+res_dir+
          ' -r '+refgenome_noAlt+
          ' -b '+gene_intervals+
          ' -t 14'+
          ' -c 2'+
          ' -rl 152'+ # max-read-length = 152
          # new parameters
          ' -5 12'+
          ' -n 1'+ # reads map to multiple locations at once
          ' -m True'+ # multimapping
          ' -N '+val1.split('/')[-1].split('_R')[0]+
          ' '+val1+" "+val2+
          ' 2>&1 | tee '+res_dir+'/'+os.path.basename(val1)+'_slumdunk.log'
          for val1, val2 in h.grouped(fastq_full[10:12], 2)], 2)

CPU times: user 428 ms, sys: 120 ms, total: 548 ms
Wall time: 4h 9min 42s


['[MAIN] NextGenMap 0.5.5\n[MAIN] Startup : x64 (build Jul  3 2020 02:47:43)\n[MAIN] Starting time: 2022-06-22.21:09:03\n[CONFIG] Parameter:  --affine 0 --argos_min_score 0 --bin_size 2 --block_multiplier 2 --broken_pairs 0 --bs_cutoff 6 --bs_mapping 0 --cpu_threads 14 --dualstrand 1 --fast 0 --fast_pairing 0 --force_rlength_check 0 --format 1 --gap_extend_penalty 5 --gap_read_penalty 20 --gap_ref_penalty 20 --hard_clip 0 --keep_tags 0 --kmer 13 --kmer_min 0 --kmer_skip 2 --local 1 --match_bonus 10 --match_bonus_tc 2 --match_bonus_tt 10 --max_cmrs 2147483647 --max_equal 1 --max_insert_size 1000 --max_polya 4 --max_read_length 0 --min_identity 0.650000 --min_insert_size 0 --min_mq 0 --min_residues 0.500000 --min_score 0.000000 --mismatch_penalty 15 --mode 0 --no_progress 1 --no_unal 0 --ocl_threads 1 --output ../../data/slamseq_MYB_degraded_coculture_v1/res/map/mr576-MV411-VHL_coculture_12h-r1_slamdunk_mapped.sam --overwrite 1 --pair_score_cutoff 0.900000 --paired 1 --parse_all 1 --pe_d

In [60]:
for val1, val2 in h.grouped(fastq_full, 2): print(os.path.basename(val1))

mr571-MV411-DMSO_coculture_12h-r1_R1_001_val_1.fq.gz
mr572-MV411-DMSO_coculture_12h-r2_R1_001_val_1.fq.gz
mr573-MV411-DMSO_coculture_12h-r3_R1_001_val_1.fq.gz
mr574-MV411-DMSO_coculture_12h-r4_R1_001_val_1.fq.gz
mr575-MV411-DMSO_coculture_12h-r5_R1_001_val_1.fq.gz
mr576-MV411-VHL_coculture_12h-r1_R1_001_val_1.fq.gz
mr577-MV411-VHL_coculture_12h-r2_R1_001_val_1.fq.gz
mr578-MV411-VHL_coculture_12h-r3_R1_001_val_1.fq.gz
mr579-MV411-VHL_coculture_12h-r4_R1_001_val_1.fq.gz
mr580-MV411-VHL_coculture_12h-r5_R1_001_val_1.fq.gz
mr581-MV411-MYB-dTAG-DMSO_coculture_12h-r1_R1_001_val_1.fq.gz
mr582-MV411-MYB-dTAG-DMSO_coculture_12h-r2_R1_001_val_1.fq.gz
mr583-MV411-MYB-dTAG-DMSO_coculture_12h-r3_R1_001_val_1.fq.gz
mr584-MV411-MYB-dTAG-DMSO_coculture_12h-r4_R1_001_val_1.fq.gz
mr585-MV411-MYB-dTAG-VHL_coculture_12h-r1_R1_001_val_1.fq.gz
mr586-MV411-MYB-dTAG-VHL_coculture_12h-r2_R1_001_val_1.fq.gz
mr587-MV411-MYB-dTAG-VHL_coculture_12h-r3_R1_001_val_1.fq.gz
mr588-MV411-MYB-dTAG-VHL_coculture_12h-r4_R1