# Running MaxBin to make bins within each treatment


In [1]:
import os
import pandas as pd
from Bio import SeqIO

In [2]:
baseDir = '/home/sam/FullCyc_metagenome/enriched_binning'
enr_contigs = '/home/sam/FullCyc_metagenome/enriched_binning/enriched_contigs_per_treatment.txt'
alignDir = '/home/sam/FullCyc_metagenome/alignments_1000k'

ncores = 10

In [3]:
treatments = list(set(pd.read_csv(enr_contigs, sep='\t')['Treatment']))
treatments

['Xylose_Day06',
 'Vanillin_Day48',
 'PalmiticAcid_Day48',
 'PalmiticAcid_Day30',
 'Cellulose_Day30',
 'Glucose_Day01',
 'Glucose_Day14',
 'Glycerol_Day14']

### Getting abundance tables

Take the abundance tables from MetaBAT

In [4]:
contigs_by_treat = pd.read_csv(enr_contigs, sep='\t')

for treat in treatments:
    print('Running:   ' + treat)
    workDir = os.path.join(baseDir, treat)
    day = int(treat.split('_Day')[1])
    substrate = treat.split('_Day')[0]
    full_cov_df = pd.read_csv(os.path.join(workDir, 'full_metabat_depths.txt'), sep='\t')
    full_cov_df = full_cov_df[full_cov_df.contigName.isin(contigs_by_treat[contigs_by_treat['Treatment'] == treat]['contigName'])]
    treat_cov_df = full_cov_df[['contigName', '_'.join([substrate, 'Day'+str(day), 'mapped.sorted.bam'])]]
    treat_cov_df.to_csv(os.path.join(workDir, treat+'_maxbin_depths.txt'), sep='\t', header=False, index=False)
    con_cov_df = full_cov_df[['contigName', '_'.join(['Control', 'Day'+str(day), 'mapped.sorted.bam'])]]
    con_cov_df.to_csv(os.path.join(workDir, treat.replace(substrate, 'Control')+'_maxbin_depths.txt'), sep='\t', header=False, index=False)
    full_cov_df = None
    treat_cov_df = None
    con_cov_df = None
    
contigs_by_treat = None

Running:   Xylose_Day06
Running:   Vanillin_Day48
Running:   PalmiticAcid_Day48
Running:   PalmiticAcid_Day30
Running:   Cellulose_Day30
Running:   Glucose_Day01
Running:   Glucose_Day14
Running:   Glycerol_Day14


### Run MaxBin2

In [None]:
for treat in treatments:
    print('Running:   ' + treat)
    substrate = treat.split('_Day')[0]
    workDir = os.path.join(baseDir, treat)
    outDir = os.path.join(workDir, treat + '_maxbin_binning')
    os.makedirs(outDir)
    cmd = ' '.join(['run_MaxBin.pl',
                    '-contig', os.path.join(workDir, treat+'.enr.contigs.1000.fasta'),
                    '-abund', os.path.join(workDir, treat+'_maxbin_depths.txt'),
                    '-abund2', os.path.join(workDir, treat.replace(substrate, 'Control')+'_maxbin_depths.txt'),
                    '-min_contig_length 1000',
                    '-thread', str(ncores),
                    '-out', os.path.join(outDir, treat+'_maxbin_out')])
    !$cmd              
    print('\n-----\n\n')

Running:   Xylose_Day06
readline() on closed filehandle FILE at /home/sam/anaconda3/envs/metawrap-env/bin/run_MaxBin.pl line 1336.
MaxBin 2.2.4
Input contig: /home/sam/FullCyc_metagenome/enriched_binning/Xylose_Day06/Xylose_Day06.enr.contigs.1000.fasta
Located abundance file [/home/sam/FullCyc_metagenome/enriched_binning/Xylose_Day06/Xylose_Day06_maxbin_depths.txt]
Located abundance file [/home/sam/FullCyc_metagenome/enriched_binning/Xylose_Day06/Control_Day06_maxbin_depths.txt]
Min contig length: 1000
Thread: 10
out header: /home/sam/FullCyc_metagenome/enriched_binning/Xylose_Day06/Xylose_Day06_maxbin_binning/Xylose_Day06_maxbin_out
Searching against 107 marker genes to find starting seed contigs for [/home/sam/FullCyc_metagenome/enriched_binning/Xylose_Day06/Xylose_Day06.enr.contigs.1000.fasta]...
Running FragGeneScan....
Running HMMER hmmsearch....
Done data collection. Running MaxBin...
Command: /home/sam/anaconda3/envs/metawrap-env/bin/src/MaxBin -fasta /home/sam/FullCyc_metagenom

Xylose_Day06_maxbin_out.001.marker.fasta
Xylose_Day06_maxbin_out.002.marker.fasta
Xylose_Day06_maxbin_out.003.marker.fasta
Xylose_Day06_maxbin_out.004.marker.fasta
Xylose_Day06_maxbin_out.005.marker.fasta
Xylose_Day06_maxbin_out.006.marker.fasta
Xylose_Day06_maxbin_out.007.marker.fasta
Xylose_Day06_maxbin_out.008.marker.fasta
Xylose_Day06_maxbin_out.009.marker.fasta
Xylose_Day06_maxbin_out.010.marker.fasta
Deleting intermediate files.


Yielded 10 bins for contig (scaffold) file /home/sam/FullCyc_metagenome/enriched_binning/Xylose_Day06/Xylose_Day06.enr.contigs.1000.fasta

Here are the output files for this run.
Please refer to the README file for further details.

Summary file: /home/sam/FullCyc_metagenome/enriched_binning/Xylose_Day06/Xylose_Day06_maxbin_binning/Xylose_Day06_maxbin_out.summary
Genome abundance info file: /home/sam/FullCyc_metagenome/enriched_binning/Xylose_Day06/Xylose_Day06_maxbin_binning/Xylose_Day06_maxbin_out.abundance
Marker counts: /home/sam/FullCyc_metagenome/

### Make bin summary file

In [None]:
for treat in treatments:
    print('Running:   ' + treat)
    workDir = os.path.join(baseDir, treat, treat + '_maxbin_binning')
    binfiles = [f for f in os.listdir(workDir) if f.startswith(treat+'_maxbin_out') and f.endswith('fasta')]
    with open(os.path.join(workDir, treat + '_maxbin_bin_assignments.txt'), 'w') as outfile:
        outfile.write('contigName\tbin\n')
        for binfasta in binfiles:
            bin_number = int(binfasta.replace(treat+'_maxbin_out.', '').replace('.fasta', ''))
            for record in SeqIO.parse(os.path.join(workDir, binfasta), 'fasta'):
                outfile.write(record.id + '\t' + str(bin_number) + '\n')
    print('\n-----\n\n')