# Binning contigs into MAGs

Samuel Barnett

### Introduction

Now that I have contigs, I want to bin them into metagenome assembled genomes (MAGs). I'll use MetaBAT for this.

## 1) Initialization

First I need to import the python modules I'll use, set some variables, initiate R magic, and create/get into the working directory.

In [1]:
import os
workDir = '/home/sam/data/SIPSim2_data/RealWorld_study3/'

nprocs = 15

In [2]:
if not os.path.isdir(workDir):
    print("Working directory does not exist!!!")
%cd $workDir

/home/sam/data/SIPSim2_data/RealWorld_study3


## 2) Mapping reads to contigs

Before binning I need to get the coverage of each contig in each read set within the group defined by the co-assembly. Here, this group is defined by the reference genome set, the read depth, and the experiment type.

In [None]:
genset_dict = {'low_GC_skew': 'lowGC', 
               'medium_GC': 'medGC', 
               'high_GC_skew': 'highGC'}
depth_dict = {'depth5MM': '5MM', 
              'depth10MM': '10MM'}
exp_dict = {'SIP': 'window', 'nonSIP': 'nonSIP'}

binDir = os.path.join(workDir, 'binning')
if not os.path.exists(binDir):
    os.makedirs(binDir)
    
for genome_set in ['low_GC_skew', 'medium_GC', 'high_GC_skew']:
    for depth in ['depth5MM', 'depth10MM']:
        fastqDir = os.path.join(workDir, genome_set, depth)

        for exp_type in ['SIP', 'nonSIP']:
            subbinDir = '_'.join([genset_dict[genome_set], depth_dict[depth], exp_type])
            subbinDir = os.path.join(binDir, subbinDir)
            if not os.path.exists(subbinDir):
                os.makedirs(subbinDir)
            %cd $subbinDir
            
            contigFile = '_'.join([genset_dict[genome_set], depth_dict[depth], exp_type])
            contigFile = os.path.join(workDir, 'coassembly', contigFile, 'final.contigs.fa')
            
            for lib in [1, 2, 3, 4, 5, 6]:
                print(' '.join(['Mapping reads for', genome_set, 
                                exp_type, 'experiment', 
                                depth, 'read depth library', str(lib)]))
                if exp_type == 'SIP':
                    F_fastq = '_'.join(['library', str(lib), 'window_1.72-1.77_reads_f.fastq.gz'])
                    R_fastq = '_'.join(['library', str(lib), 'window_1.72-1.77_reads_r.fastq.gz'])
                elif exp_type == 'nonSIP':
                    F_fastq = '_'.join(['nonSIP_library', str(lib), 'reads_f.fastq.gz'])
                    R_fastq = '_'.join(['nonSIP_library', str(lib), 'reads_r.fastq.gz'])
                else:
                    print("Error with selecting files")
                F_fastq = os.path.join(fastqDir, F_fastq)
                R_fastq = os.path.join(fastqDir, R_fastq)
                    
                if not os.path.isfile(F_fastq):
                    print(' '.join([F_fastq, 'does not exist']))
                if not os.path.isfile(R_fastq):
                    print(' '.join([R_fastq, 'does not exist']))
                
                # Reorder reads so that they are paired
                print('Reordering reads')
                cmd = ''.join(['repair.sh in1=', F_fastq, ' in2=', R_fastq, 
                               ' out1=repaired1.fastq.gz out2=repaired2.fastq.gz outs=repairedS.fastq.gz'])
                os.system(cmd)
                
                # Map reads
                print('Mapping reads')
                samname = ''.join(['library_', str(lib), '.sam'])
                cmd = ''.join(['bbmap.sh ref=', contigFile,
                               ' in=repaired1.fastq.gz in2=repaired2.fastq.gz', 
                               ' out=', samname, ' threads=', str(nprocs), 
                               ' bamscript=bs.sh; sh bs.sh'])
                os.system(cmd)
                
                # Remove "repaired" reads
                print('Removing reordered reads')
                cmd = 'rm repaired1.fastq.gz repaired2.fastq.gz repairedS.fastq.gz'
                os.system(cmd)
                
                print('\n')

            print('\n')



/home/sam/data/SIPSim2_data/RealWorld_study3/binning/lowGC_5MM_SIP
Mapping reads for low_GC_skew SIP experiment depth5MM read depth library 1
Reordering reads
Mapping reads
Removing reordered reads


Mapping reads for low_GC_skew SIP experiment depth5MM read depth library 2
Reordering reads


## 3) Binning with MetaBat

Now I'll do the binning within each group. Again group is defined by the reference genome set, the read depth, and the experiment type.

In [None]:
genset_dict = {'low_GC_skew': 'lowGC', 
               'medium_GC': 'medGC', 
               'high_GC_skew': 'highGC'}
depth_dict = {'depth5MM': '5MM', 
              'depth10MM': '10MM'}
exp_dict = {'SIP': 'window', 'nonSIP': 'nonSIP'}

binDir = os.path.join(workDir, 'binning')
if not os.path.exists(binDir):
    os.makedirs(binDir)
    
for genome_set in ['low_GC_skew', 'medium_GC', 'high_GC_skew']:
    for depth in ['depth5MM', 'depth10MM']:
        for exp_type in ['SIP', 'nonSIP']:
            subbinDir = '_'.join([genset_dict[genome_set], depth_dict[depth], exp_type])
            subbinDir = os.path.join(binDir, subbinDir)
            if not os.path.exists(subbinDir):
                os.makedirs(subbinDir)
            %cd $subbinDir
            
            contigFile = '_'.join([genset_dict[genome_set], depth_dict[depth], exp_type])
            contigFile = os.path.join(workDir, 'coassembly', contigFile, 'final.contigs.fa')
            
            print('Summarizing contig depths')
            cmd = 'jgi_summarize_bam_contig_depths --outputDepth depth.txt --minContigLength 1000 --minContigDepth 2 library_1_sorted.bam library_2_sorted.bam library_3_sorted.bam library_4_sorted.bam library_5_sorted.bam library_6_sorted.bam'
            os.system(cmd)
            
            print('Binning')
            cmd = ' '.join(['metabat2 -i', contigFile, 
                            '-a depth.txt', 
                            '-o init_bins/bin',
                            '--saveCls',
                            '--unbinned',
                            '-t', str(nprocs)])
            os.system(cmd)
            
            print('---\n')

In [6]:
print('Done')

Done
