# Variant calling using bcftools   

use preferred conda env  
**Packages needed**: bcftools

In [4]:
import sys
import ipyparallel as ipp
import os
from os import environ
import gzip
import warnings
import pandas as pd
import numpy as np
import scipy as sp
import glob
import re
import random

In [5]:
root = "/data/gpfs/assoc/denovo/tfaske/SPCR"

In [6]:
snp_dir = os.path.join(root,'SNPcall')

In [7]:
bwa_dir = os.path.join(snp_dir,'bwa')

In [8]:
cd $bwa_dir

/data/gpfs/assoc/denovo/tfaske/SPCR/SNPcall/bwa


### Assembly

In [9]:
assembly = os.path.join(root,"assembly/reference.fasta")
assert assembly

In [10]:
bam_files = []
files = !find . -type f -name '*sorted.bam'
files = [os.path.abspath(x) for x in files if 'bam' in x]
for x in files:
    bam_files.append(x)
bam_files = sorted(bam_files)

In [11]:
len(bam_files), bam_files[0]

(626, '/data/gpfs/assoc/denovo/tfaske/SPCR/SNPcall/bwa/SE10_F_10_sorted.bam')

# Call snps

    

In [12]:
cd $snp_dir

/data/gpfs/assoc/denovo/tfaske/SPCR/SNPcall


In [13]:
!mkdir vcf

In [14]:
vcf_dir = os.path.join(snp_dir,"vcf")
assert(vcf_dir)

In [15]:
cd $vcf_dir

/data/gpfs/assoc/denovo/tfaske/SPCR/SNPcall/vcf


#### make a bam_list

In [16]:
with open('bam_list.txt', "w") as o:
    for b in bam_files:
        o.write("%s\n" % (b))

In [17]:
### select options for slurm submission
#account = 'cpu-s1-bionres-0'
#partition = 'cpu-s1-bionres-0'
account = 'cpu-s5-denovo-0'
partition = 'cpu-core-0'
jobname = 'bcftools'
outname = 'SPCR' #change
time = '4-00:00:00' #time limit 2 day
cpus = 1
mem_cpu = 100000
email = 'tfaske@nevada.unr.edu'

In [18]:
def write_bcftools_sh(account,partition,time,cpus,mem_cpu,email,assembly,outname):
        with open("run_bcftools.sh" , "w") as o:
            o.write("""#!/usr/bin/env bash
#SBATCH --account=%s
#SBATCH --partition=%s
#SBATCH --time=%s
#SBATCH --ntasks 1
#SBATCH --cpus-per-task %d
#SBATCH --mem-per-cpu=%d
#SBATCH --job-name bcftools
#SBATCH --output output_bcftools.txt
#SBATCH --mail-type=FAIL,END
#SBATCH --mail-user=%s \n\n
    
bcftools mpileup -a DP,AD,INFO/AD -C 50 -d 250 -f %s -q 30 -Q 20 -I -b bam_list.txt | bcftools call -v -m -f GQ -O z -o %s.vcf.gz
 \n""" % (account,partition,time,int(cpus),int(mem_cpu),email,assembly,outname))

In [19]:
write_bcftools_sh(account,partition,time,cpus,mem_cpu,email,assembly,outname)

# Run run_bcftools.sh locally
    cd /data/gpfs/assoc/denovo/PHHA/SNPcall/vcf 
    source activate py36
    bash run_bcftools.sh