# Running CONCOCT to make bins within each treatment


In [1]:
import os
import pandas as pd
from Bio import SeqIO

In [2]:
baseDir = '/home/sam/FullCyc_metagenome/enriched_binning'
enr_contigs = '/home/sam/FullCyc_metagenome/enriched_binning/enriched_contigs_per_treatment.txt'
alignDir = '/home/sam/FullCyc_metagenome/alignments_1000k'

ncores = 30

## Subsetting

In [3]:
treatments = list(set(pd.read_csv(enr_contigs, sep='\t')['Treatment']))
treatments

['Xylose_Day06',
 'Vanillin_Day48',
 'Glycerol_Day14',
 'Glucose_Day01',
 'Glucose_Day14',
 'PalmiticAcid_Day30',
 'PalmiticAcid_Day48',
 'Cellulose_Day30']

### Cut contigs into smaller parts

In [4]:
for treat in treatments:
    print('Running:   ' + treat)
    workDir = os.path.join(baseDir, treat)
    cmd = ' '.join(['cut_up_fasta.py', 
                    os.path.join(workDir, treat+'.enr.contigs.1000.fasta'),
                    '-c 10000',
                    '-o 0', 
                    '--merge_last',
                    '-b', os.path.join(workDir, treat+'.split_contigs_10k.bed'),
                    '>', os.path.join(workDir, treat+'.split_contigs_10k.fa')])
    !$cmd
    !ls -lh $workDir
    print('\n-----\n\n')

Running:   Xylose_Day06
total 156M
-rw-r--r-- 1 sam buckley  77M Jul  7 17:49 Xylose_Day06.enr.contigs.1000.fasta
-rw-r--r-- 1 sam buckley 2.7M Jul  7 21:46 Xylose_Day06.split_contigs_10k.bed
-rw-r--r-- 1 sam buckley  77M Jul  7 21:46 Xylose_Day06.split_contigs_10k.fa

-----


Running:   Vanillin_Day48
total 409M
-rw-r--r-- 1 sam buckley 200M Jul  7 19:47 Vanillin_Day48.enr.contigs.1000.fasta
-rw-r--r-- 1 sam buckley 7.3M Jul  7 21:47 Vanillin_Day48.split_contigs_10k.bed
-rw-r--r-- 1 sam buckley 202M Jul  7 21:47 Vanillin_Day48.split_contigs_10k.fa

-----


Running:   Glycerol_Day14
total 320M
-rw-r--r-- 1 sam buckley 157M Jul  7 20:09 Glycerol_Day14.enr.contigs.1000.fasta
-rw-r--r-- 1 sam buckley 5.4M Jul  7 21:47 Glycerol_Day14.split_contigs_10k.bed
-rw-r--r-- 1 sam buckley 158M Jul  7 21:47 Glycerol_Day14.split_contigs_10k.fa

-----


Running:   Glucose_Day01
total 239M
-rw-r--r-- 1 sam buckley 117M Jul  7 17:36 Glucose_Day01.enr.contigs.1000.fasta
-rw-r--r-- 1 sam buckley 4.1M Jul 

### Generate coverage tables

In [None]:
for treat in treatments:
    print('Running:   ' + treat)
    workDir = os.path.join(baseDir, treat)
    day = int(treat.split('_Day')[1])
    substrate = treat.split('_Day')[0]
    cmd = ' '.join(['concoct_coverage_table.py',
                    os.path.join(workDir, treat+'.split_contigs_10k.bed'),
                    os.path.join(alignDir, substrate+'_Day'+str(day)+'_mapped.sorted.bam'),
                    os.path.join(alignDir, 'Control_Day'+str(day)+'_mapped.sorted.bam'),
                    '>', os.path.join(workDir, treat+'.cov_table.tsv')])
    !$cmd
    !ls -lh $workDir
    print('\n-----\n\n')

Running:   Xylose_Day06
[W::hts_idx_load2] The index file is older than the data file: /home/sam/FullCyc_metagenome/alignments_1000k/Xylose_Day6_mapped.sorted.bam.bai


### Run CONCOCT

In [None]:
for treat in treatments:
    print('Running:   ' + treat)
    workDir = os.path.join(baseDir, treat)
    cmd = ' '.join(['concoct',
                    '--coverage_file', os.path.join(workDir, treat+'.cov_table.tsv'),
                    '--composition_file', os.path.join(workDir, treat+'.split_contigs_10k.fa'),
                    '-k 4', 
                    '-c 400',
                    '-t', str(ncores),
                    '-r 150',
                    '-s 7242',
                    '-i 500',
                    '--converge_out',
                    '-b', os.path.join(workDir, treat+'_concoct_binning/')])
    
    !$cmd               
    print('\n-----\n\n')

### Merge subcontig clustering into original contig clusters

In [None]:
for treat in treatments:
    print('Running:   ' + treat)
    workDir = os.path.join(baseDir, treat, treat+'_concoct_binning')
    cmd = ' '.join(['merge_cutup_clustering.py',
                    os.path.join(workDir, 'clustering_gt1000.csv'),
                    '>', os.path.join(workDir, 'clustering_merged.csv')])
    !$cmd
    print('\n-----\n\n')

In [8]:
print('Done!')

Done!


### Make bin fasta files

In [11]:
for treat in treatments:
    print('Running:   ' + treat)
    workDir = os.path.join(baseDir, treat, treat+'_concoct_binning', 'fasta_bins')
    os.makedirs(workDir)
    cmd = ' '.join(['extract_fasta_bins.py',
                    os.path.join(baseDir, treat, treat+'.enr.contigs.1000.fasta'),
                    os.path.join(baseDir, treat, treat+'_concoct_binning', 'clustering_merged.csv'),
                    '--output_path', workDir])
    !$cmd
    print('\n-----\n\n')

Running:   Xylose_Day06

-----


Running:   Vanillin_Day48

-----


Running:   Glycerol_Day14

-----


Running:   Glucose_Day01

-----


Running:   Glucose_Day14

-----


Running:   PalmiticAcid_Day30

-----


Running:   PalmiticAcid_Day48

-----


Running:   Cellulose_Day30

-----


