# Running MetaBAT2 to make bins within each treatment


In [1]:
import os
import pandas as pd
from Bio import SeqIO

In [16]:
baseDir = '/home/sam/FullCyc_metagenome/enriched_binning'
enr_contigs = '/home/sam/FullCyc_metagenome/enriched_binning/enriched_contigs_per_treatment.txt'
alignDir = '/home/sam/FullCyc_metagenome/alignments_1000k'

ncores = 20

In [3]:
treatments = list(set(pd.read_csv(enr_contigs, sep='\t')['Treatment']))
treatments

['Xylose_Day06',
 'Vanillin_Day48',
 'PalmiticAcid_Day48',
 'PalmiticAcid_Day30',
 'Cellulose_Day30',
 'Glucose_Day01',
 'Glucose_Day14',
 'Glycerol_Day14']

### Getting contig coverages

In [7]:
for treat in treatments:
    print('Running:   ' + treat)
    workDir = os.path.join(baseDir, treat)
    day = int(treat.split('_Day')[1])
    substrate = treat.split('_Day')[0]
    cmd = ' '.join(['jgi_summarize_bam_contig_depths', 
                    '--outputDepth', os.path.join(workDir, 'full_metabat_depths.txt'),
                    os.path.join(alignDir, substrate+'_Day'+str(day)+'_mapped.sorted.bam'),
                    os.path.join(alignDir, 'Control_Day'+str(day)+'_mapped.sorted.bam')])
    !$cmd

Running:   Xylose_Day06
Output depth matrix to /home/sam/FullCyc_metagenome/enriched_binning/Xylose_Day06/full_metabat_depths.txt
Output matrix to /home/sam/FullCyc_metagenome/enriched_binning/Xylose_Day06/full_metabat_depths.txt
Opening 2 bams
Consolidating headers
Processing bam files
Thread 1 finished: Control_Day6_mapped.sorted.bam with 202765028 reads and 60704134 readsWellMapped
Thread 0 finished: Xylose_Day6_mapped.sorted.bam with 366186976 reads and 143481492 readsWellMapped
Creating depth matrix file: /home/sam/FullCyc_metagenome/enriched_binning/Xylose_Day06/full_metabat_depths.txt
Closing most bam files
Closing last bam file
Finished
Running:   Vanillin_Day48
Output depth matrix to /home/sam/FullCyc_metagenome/enriched_binning/Vanillin_Day48/full_metabat_depths.txt
Output matrix to /home/sam/FullCyc_metagenome/enriched_binning/Vanillin_Day48/full_metabat_depths.txt
Opening 2 bams
Consolidating headers
Processing bam files
Thread 0 finished: Vanillin_Day48_mapped.sorted.bam w

Need to remove the contigs that we dont want to bin from this coverage file

In [9]:
contigs_by_treat = pd.read_csv(enr_contigs, sep='\t')

for treat in treatments:
    print('Running:   ' + treat)
    workDir = os.path.join(baseDir, treat)
    full_cov_df = pd.read_csv(os.path.join(workDir, 'full_metabat_depths.txt'), sep='\t')
    full_cov_df = full_cov_df[full_cov_df.contigName.isin(contigs_by_treat[contigs_by_treat['Treatment'] == treat]['contigName'])]
    full_cov_df.to_csv(os.path.join(workDir, treat+'_metabat_depths.txt'), sep='\t', header=True, index=False)
    full_cov_df = None
    
contigs_by_treat = None

Running:   Xylose_Day06
Running:   Vanillin_Day48
Running:   PalmiticAcid_Day48
Running:   PalmiticAcid_Day30
Running:   Cellulose_Day30
Running:   Glucose_Day01
Running:   Glucose_Day14
Running:   Glycerol_Day14


### Run MetaBAT

In [17]:
for treat in treatments:
    print('Running:   ' + treat)
    workDir = os.path.join(baseDir, treat)
    outDir = os.path.join(baseDir, treat, treat+'_metabat_binning')
    os.makedirs(outDir)
    cmd = ' '.join(['metabat',
                    '-i', os.path.join(workDir, treat+'.enr.contigs.1000.fasta'),
                    '-a', os.path.join(workDir, treat+'_metabat_depths.txt'),
                    '-o', os.path.join(outDir, treat+'_bin'),
                    '--saveTNF', os.path.join(outDir, treat+'_TNF.txt'),
                    '--saveDistance', os.path.join(outDir, 'metabat_distances.txt'),
                    '--minContig 1500',
                    '--maxEdges 500',
                    '-t', str(ncores),
                    '--seed 42'])
    
    !$cmd              
    print('\n-----\n\n')

Running:   Xylose_Day06
MetaBAT 2 (v2.12.1) using minContig 1500, minCV 1.0, minCVSum 1.0, maxP 95%, minS 60, and maxEdges 500. 
20 bins (30147006 bases in total) formed.

-----


Running:   Vanillin_Day48
MetaBAT 2 (v2.12.1) using minContig 1500, minCV 1.0, minCVSum 1.0, maxP 95%, minS 60, and maxEdges 500. 
42 bins (82120772 bases in total) formed.

-----


Running:   PalmiticAcid_Day48
MetaBAT 2 (v2.12.1) using minContig 1500, minCV 1.0, minCVSum 1.0, maxP 95%, minS 60, and maxEdges 500. 
33 bins (151968670 bases in total) formed.

-----


Running:   PalmiticAcid_Day30
MetaBAT 2 (v2.12.1) using minContig 1500, minCV 1.0, minCVSum 1.0, maxP 95%, minS 60, and maxEdges 500. 
66 bins (145889853 bases in total) formed.

-----


Running:   Cellulose_Day30
MetaBAT 2 (v2.12.1) using minContig 1500, minCV 1.0, minCVSum 1.0, maxP 95%, minS 60, and maxEdges 500. 
55 bins (179634796 bases in total) formed.

-----


Running:   Glucose_Day01
MetaBAT 2 (v2.12.1) using minContig 1500, minCV 1.0, mi

In [19]:
print("Done!")

Done!
