In [16]:
from os import listdir

## 0.Bacterial Genomes 

In [11]:
%%bash
grep -c ">" 0.bacterialGenomes/FinalBacterialGenomes.fasta.*

0.bacterialGenomes/FinalBacterialGenomes.fasta.1:410
0.bacterialGenomes/FinalBacterialGenomes.fasta.10:410
0.bacterialGenomes/FinalBacterialGenomes.fasta.11:410
0.bacterialGenomes/FinalBacterialGenomes.fasta.12:410
0.bacterialGenomes/FinalBacterialGenomes.fasta.13:410
0.bacterialGenomes/FinalBacterialGenomes.fasta.14:410
0.bacterialGenomes/FinalBacterialGenomes.fasta.15:410
0.bacterialGenomes/FinalBacterialGenomes.fasta.16:410
0.bacterialGenomes/FinalBacterialGenomes.fasta.17:410
0.bacterialGenomes/FinalBacterialGenomes.fasta.18:410
0.bacterialGenomes/FinalBacterialGenomes.fasta.19:410
0.bacterialGenomes/FinalBacterialGenomes.fasta.2:410
0.bacterialGenomes/FinalBacterialGenomes.fasta.20:373
0.bacterialGenomes/FinalBacterialGenomes.fasta.3:410
0.bacterialGenomes/FinalBacterialGenomes.fasta.4:410
0.bacterialGenomes/FinalBacterialGenomes.fasta.5:410
0.bacterialGenomes/FinalBacterialGenomes.fasta.6:410
0.bacterialGenomes/FinalBacterialGenomes.fasta.7:410
0.bacterialGenomes/FinalBacterialGe

In [13]:
print "The number of bacterial genomes is %d",410*19 + 373

The number of bacterial genomes is %d 8163


## 1. Create bowtie indexes 

### 1.1 Bacterial Indexes

In [6]:
%%writefile makeIndexes.sh
#!/bin/bash
#$ -N btw2Indexes
#$ -l h_vmem=8G
#$ -l h_rt=72:00:00
#$ -cwd
#$ -t 1-20
#$ -m ea
#$ -M jmoreno@tuebingen.mpg.de

bowtie2-build 0.bacterialGenomes/FinalBacterialGenomes.fasta.$SGE_TASK_ID FinalBacteriaGenoma.$SGE_TASK_ID  >& FinalBacterialGenomes.$SGE_TASK_ID.idxlog

Overwriting makeIndexes.sh


In [7]:
%%bash
qsub makeIndexes.sh

Your job-array 8540424.1-20:1 ("btw2Indexes") has been submitted


In [125]:
mv FinalBacteriaGenoma.* 1.indx/

### 1.2 Homo sapiens Indexes

In [14]:
%%writefile makeIndexesHomo.sh
#!/bin/bash
#$ -N homoIndx
#$ -l h_vmem=4G
#$ -l h_rt=72:00:00
#$ -cwd
#$ -m ea
#$ -M jmoreno@tuebingen.mpg.de

bowtie2-build 0.HomoGenome/GCF_000001405.36_GRCh38.p10_genomic.fna homoGenome >& homoGenome.idxlog

Writing makeIndexesHomo.sh


In [15]:
%%bash
qsub makeIndexesHomo.sh

Your job 8540447 ("homoIndx") has been submitted


In [81]:
%%bash
mv homoGenome.* 1.indx/

## 2. Bowtie mapping

### 2.1 Against bacteria

In [138]:
readsFolder = "../0_readsVirome/"
fw = open("againstBacteria.list.txt","w")
for i in range(1,21):
    for fileName in listdir(readsFolder):
        if "R1.fastq" in fileName:
            sample = fileName.split("_")[5]
            f = readsFolder+fileName
            r = readsFolder+fileName.replace("R1","R2")
            fw.write("bowtie2 -p 10 -t -k 3 -x 1.indx/FinalBacteriaGenoma."+str(i))
            fw.write(" -1 "+str(f)+" -2 "+str(r)+" -S "+str(sample)+"."+str(i)+".sam &> "+str(sample)+"."+str(i)+".log")
            fw.write("\n")
fw.close()

In [140]:
%%writefile runBowtieBacteria.sh
#!/bin/bash
#$ -N bteBac
#$ -l h_vmem=4G
#$ -l h_rt=06:00:00
#$ -cwd

line=$FOO
eval $line

Overwriting runBowtieBacteria.sh


In [141]:
%%writefile callRunBowtieBacteria.sh
#!/bin/sh

cat againstBacteria.list.txt | while read -r line
do
    name="$(echo $line | cut -d " " -f16 |cut -d "." -f1)"
    #echo $name
    qsub -v FOO="$line" runBowtieBacteria.sh
done

Overwriting callRunBowtieBacteria.sh


In [142]:
%%bash
chmod +x callRunBowtieBacteria.sh
./callRunBowtieBacteria.sh

Your job 8542131 ("bteBac") has been submitted
Your job 8542132 ("bteBac") has been submitted
Your job 8542133 ("bteBac") has been submitted
Your job 8542134 ("bteBac") has been submitted
Your job 8542135 ("bteBac") has been submitted
Your job 8542136 ("bteBac") has been submitted
Your job 8542137 ("bteBac") has been submitted
Your job 8542138 ("bteBac") has been submitted
Your job 8542139 ("bteBac") has been submitted
Your job 8542140 ("bteBac") has been submitted
Your job 8542141 ("bteBac") has been submitted
Your job 8542142 ("bteBac") has been submitted
Your job 8542143 ("bteBac") has been submitted
Your job 8542144 ("bteBac") has been submitted
Your job 8542145 ("bteBac") has been submitted
Your job 8542146 ("bteBac") has been submitted
Your job 8542147 ("bteBac") has been submitted
Your job 8542148 ("bteBac") has been submitted
Your job 8542149 ("bteBac") has been submitted
Your job 8542150 ("bteBac") has been submitted
Your job 8542151 ("bteBac") has been submitted
Your job 8542

### 2.2 Against Homo sapiens

In [117]:
readsFolder = "../0_readsVirome/"
fw = open("againstHomoSapiens.list.txt","w")
for fileName in listdir(readsFolder):
    if "R1.fastq" in fileName:
        sample = fileName.split("_")[5]
        f = readsFolder+fileName
        r = readsFolder+fileName.replace("R1","R2")
        fw.write("bowtie2 -p 10 -t -k 3 -x 1.indx/homoGenome "+"-1 "+str(f)+" -2 "+str(r)+" -S "+str(sample)+" &> "+str(sample)+".log"+"\n")
fw.close()

In [118]:
%%writefile callRunBowtieHomo.sh
#!/bin/sh

cat againstHomoSapiens.list.txt | while read -r line
do
    name="$(echo $line | cut -d " " -f16 |cut -d "." -f1)"
    #echo $name
    qsub -v FOO="$line" runBowtieHomo.sh
done

Overwriting callRunBowtieHomo.sh


In [123]:
%%writefile runBowtieHomo.sh
#!/bin/bash
#$ -N bteHs
#$ -l h_vmem=4G
#$ -l h_rt=24:00:00
#$ -cwd
#$ -m ea
#$ -M jmoreno@tuebingen.mpg.de

line=$FOO
eval $line

Overwriting runBowtieHomo.sh


In [124]:
%%bash
chmod +x callRunBowtieHomo.sh
./callRunBowtieHomo.sh

Your job 8541050 ("bteHs") has been submitted
Your job 8541051 ("bteHs") has been submitted
Your job 8541052 ("bteHs") has been submitted
Your job 8541053 ("bteHs") has been submitted
Your job 8541054 ("bteHs") has been submitted
Your job 8541055 ("bteHs") has been submitted
Your job 8541056 ("bteHs") has been submitted
Your job 8541057 ("bteHs") has been submitted
Your job 8541058 ("bteHs") has been submitted
Your job 8541059 ("bteHs") has been submitted
Your job 8541060 ("bteHs") has been submitted
Your job 8541061 ("bteHs") has been submitted
Your job 8541062 ("bteHs") has been submitted
Your job 8541063 ("bteHs") has been submitted
Your job 8541064 ("bteHs") has been submitted
Your job 8541065 ("bteHs") has been submitted
Your job 8541066 ("bteHs") has been submitted
Your job 8541067 ("bteHs") has been submitted
Your job 8541068 ("bteHs") has been submitted
Your job 8541069 ("bteHs") has been submitted
Your job 8541070 ("bteHs") has been submitted
Your job 8541071 ("bteHs") has bee

## 3. Filter mappings through quality

### 3.1 Bacteria

In [156]:
fw1 = open("sam2bam.bacteria.txt","w")
fw2 = open("qualityFilter.bacteria.txt","w")
fw3 = open("depth.bacteria.txt","w")
for sam in listdir("2.bowtie.bacteria/"):
    
    sample = sam.replace(".sam","")
    bam = sam.replace(".sam",".bam")
    filtered = bam.replace(".bam",".q20.bam")
    coverage = filtered.replace(".bam",".coverage")
    
    fw1.write("samtools view -bS 2.bowtie.bacteria/"+sam+" | samtools sort - "+sample+"\n")
    fw2.write("samtools view -q 20 -b "+bam+" > "+filtered+"\n")
    fw3.write("samtools depth "+filtered+" > "+coverage+"\n")
fw1.close()
fw2.close()
fw3.close()

### 3.1.1. Sam2Bam

In [157]:
%%writefile callSam2Bam.sh
#!/bin/sh

cat sam2bam.bacteria.txt | while read -r line
do
    name="$(echo $line | cut -d " " -f16 |cut -d "." -f1)"
    #echo $name
    qsub -v FOO="$line" runSam2Bam.sh
done

Writing callSam2Bam.sh


In [158]:
%%writefile runSam2Bam.sh
#!/bin/bash
#$ -N bteHs
#$ -l h_vmem=4G
#$ -l h_rt=01:00:00
#$ -cwd

line=$FOO
eval $line

Writing runSam2Bam.sh


In [159]:
%%bash
chmod +x callSam2Bam.sh
./callSam2Bam.sh

Your job 8543230 ("bteHs") has been submitted
Your job 8543231 ("bteHs") has been submitted
Your job 8543232 ("bteHs") has been submitted
Your job 8543233 ("bteHs") has been submitted
Your job 8543234 ("bteHs") has been submitted
Your job 8543235 ("bteHs") has been submitted
Your job 8543236 ("bteHs") has been submitted
Your job 8543237 ("bteHs") has been submitted
Your job 8543238 ("bteHs") has been submitted
Your job 8543239 ("bteHs") has been submitted
Your job 8543240 ("bteHs") has been submitted
Your job 8543241 ("bteHs") has been submitted
Your job 8543242 ("bteHs") has been submitted
Your job 8543243 ("bteHs") has been submitted
Your job 8543244 ("bteHs") has been submitted
Your job 8543245 ("bteHs") has been submitted
Your job 8543246 ("bteHs") has been submitted
Your job 8543247 ("bteHs") has been submitted
Your job 8543248 ("bteHs") has been submitted
Your job 8543249 ("bteHs") has been submitted
Your job 8543250 ("bteHs") has been submitted
Your job 8543251 ("bteHs") has bee

### 3.1.2 Quality Filter

In [166]:
%%writefile callQualityFilter.sh
#!/bin/sh

cat qualityFilter.bacteria.txt | while read -r line
do
    qsub -v FOO="$line" runQF.sh
done

Overwriting callQualityFilter.sh


In [164]:
%%writefile runQF.sh
#!/bin/bash
#$ -N bteHs
#$ -l h_vmem=2G
#$ -l h_rt=01:00:00
#$ -cwd

line=$FOO
eval $line

Overwriting runQF.sh


In [167]:
%%bash
chmod +x callQualityFilter.sh
./callQualityFilter.sh

Your job 8544206 ("bteHs") has been submitted
Your job 8544207 ("bteHs") has been submitted
Your job 8544208 ("bteHs") has been submitted
Your job 8544209 ("bteHs") has been submitted
Your job 8544210 ("bteHs") has been submitted
Your job 8544211 ("bteHs") has been submitted
Your job 8544212 ("bteHs") has been submitted
Your job 8544213 ("bteHs") has been submitted
Your job 8544214 ("bteHs") has been submitted
Your job 8544215 ("bteHs") has been submitted
Your job 8544216 ("bteHs") has been submitted
Your job 8544217 ("bteHs") has been submitted
Your job 8544218 ("bteHs") has been submitted
Your job 8544219 ("bteHs") has been submitted
Your job 8544220 ("bteHs") has been submitted
Your job 8544221 ("bteHs") has been submitted
Your job 8544222 ("bteHs") has been submitted
Your job 8544223 ("bteHs") has been submitted
Your job 8544224 ("bteHs") has been submitted
Your job 8544225 ("bteHs") has been submitted
Your job 8544226 ("bteHs") has been submitted
Your job 8544227 ("bteHs") has bee

### 3.1.3 bam2coverage

In [168]:
%%writefile callBam2Coverage.sh
#!/bin/sh

cat depth.bacteria.txt | while read -r line
do
    qsub -v FOO="$line" runCoverage.sh
done

Writing callBam2Coverage.sh


In [169]:
%%writefile runCoverage.sh
#!/bin/bash
#$ -N bteHs
#$ -l h_vmem=2G
#$ -l h_rt=01:00:00
#$ -cwd

line=$FOO
eval $line

Writing runCoverage.sh


In [170]:
%%bash
chmod +x callBam2Coverage.sh
./callBam2Coverage.sh

Your job 8546032 ("bteHs") has been submitted
Your job 8546033 ("bteHs") has been submitted
Your job 8546034 ("bteHs") has been submitted
Your job 8546035 ("bteHs") has been submitted
Your job 8546036 ("bteHs") has been submitted
Your job 8546037 ("bteHs") has been submitted
Your job 8546038 ("bteHs") has been submitted
Your job 8546039 ("bteHs") has been submitted
Your job 8546040 ("bteHs") has been submitted
Your job 8546041 ("bteHs") has been submitted
Your job 8546042 ("bteHs") has been submitted
Your job 8546043 ("bteHs") has been submitted
Your job 8546044 ("bteHs") has been submitted
Your job 8546045 ("bteHs") has been submitted
Your job 8546046 ("bteHs") has been submitted
Your job 8546047 ("bteHs") has been submitted
Your job 8546048 ("bteHs") has been submitted
Your job 8546049 ("bteHs") has been submitted
Your job 8546050 ("bteHs") has been submitted
Your job 8546051 ("bteHs") has been submitted
Your job 8546052 ("bteHs") has been submitted
Your job 8546053 ("bteHs") has bee

In [171]:
%%bash
mkdir 3.bamFiles.bacteria 4.q20Files.bacteria 5.coverageFiles.bacteria
mv *.q20.bam 4.q20Files.bacteria
mv *.bam 3.bamFiles.bacteria
mv *.coverage 5.coverageFiles.bacteria

mv: cannot move '4.q20Files.bacteria' to a subdirectory of itself, '4.q20Files.bacteria/4.q20Files.bacteria'
mv: cannot stat '*.coverage': No such file or directory


## 3.2 Homo sapiens

In [176]:
fw1 = open("sam2bam.homo.txt","w")
fw2 = open("qualityFilter.homo.txt","w")
fw3 = open("depth.homo.txt","w")
for sam in listdir("2.bowtie.homo/"):
    
    sample = sam.replace(".sam","")
    bam = sample+".bam"
    filtered = bam.replace(".bam",".q20.bam")
    coverage = filtered.replace(".bam",".coverage")
    
    fw1.write("samtools view -bS 2.bowtie.homo/"+sam+" | samtools sort - "+sample+"\n")
    fw2.write("samtools view -q 20 -b "+bam+" > "+filtered+"\n")
    fw3.write("samtools depth "+filtered+" > "+coverage+"\n")
fw1.close()
fw2.close()
fw3.close()

### 3.2.1. Sam2Bam

In [173]:
%%writefile callSam2Bam.homo.sh
#!/bin/sh

cat sam2bam.homo.txt | while read -r line
do
    qsub -v FOO="$line" runSam2Bam.homo.sh
done

Writing callSam2Bam.homo.sh


In [174]:
%%writefile runSam2Bam.homo.sh
#!/bin/bash
#$ -N bteHs
#$ -l h_vmem=4G
#$ -l h_rt=01:00:00
#$ -cwd

line=$FOO
eval $line

Writing runSam2Bam.homo.sh


In [175]:
%%bash
chmod +x callSam2Bam.homo.sh
./callSam2Bam.homo.sh

Your job 8546993 ("bteHs") has been submitted
Your job 8546994 ("bteHs") has been submitted
Your job 8546995 ("bteHs") has been submitted
Your job 8546996 ("bteHs") has been submitted
Your job 8546997 ("bteHs") has been submitted
Your job 8546998 ("bteHs") has been submitted
Your job 8546999 ("bteHs") has been submitted
Your job 8547000 ("bteHs") has been submitted
Your job 8547001 ("bteHs") has been submitted
Your job 8547002 ("bteHs") has been submitted
Your job 8547003 ("bteHs") has been submitted
Your job 8547004 ("bteHs") has been submitted
Your job 8547005 ("bteHs") has been submitted
Your job 8547006 ("bteHs") has been submitted
Your job 8547007 ("bteHs") has been submitted
Your job 8547008 ("bteHs") has been submitted
Your job 8547009 ("bteHs") has been submitted
Your job 8547010 ("bteHs") has been submitted
Your job 8547011 ("bteHs") has been submitted
Your job 8547012 ("bteHs") has been submitted
Your job 8547013 ("bteHs") has been submitted
Your job 8547014 ("bteHs") has bee

### 3.2.2 quality Filter

In [177]:
%%bash
chmod +x qualityFilter.homo.txt
./qualityFilter.homo.txt

### 3.2.3 bam2coverage 

In [178]:
%%bash
chmod +x depth.homo.txt
./depth.homo.txt

In [179]:
%%bash
mkdir 3.bamFiles.homo 4.q20Files.homo 5.coverageFiles.homo
mv *.q20.bam 4.q20Files.homo
mv *.bam 3.bamFiles.homo
mv *.coverage 5.coverageFiles.homo