Skip to content

Commit

Permalink
Closes issue #4
Browse files Browse the repository at this point in the history
  • Loading branch information
andersgs committed Jan 17, 2017
1 parent 0c8f2c3 commit 1bd0133
Show file tree
Hide file tree
Showing 59 changed files with 79 additions and 31 deletions.
2 changes: 2 additions & 0 deletions MANIFEST.in
@@ -0,0 +1,2 @@
graft kleborate/data
graft kleborate/ICEKp_references
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
71 changes: 41 additions & 30 deletions Kleborate.py → kleborate/Kleborate.py
Expand Up @@ -4,17 +4,18 @@
import os, sys, subprocess
import gzip
from optparse import OptionParser

def main():
from pkg_resources import resource_string, resource_filename

def parse_options():

usage = "usage: %prog [options]"
parser = OptionParser(usage=usage)

# options
parser.add_option("-p", "--path", action="store", dest="repo_path", help="Path to Kleborate directory (default Kleborate)", default="Kleborate")
#parser.add_option("-p", "--path", action="store", dest="repo_path", help="Path to Kleborate directory (default Kleborate)", default="Kleborate")
parser.add_option("-o", "--outfile", action="store", dest="outfile", help="File for detailed output (default Kleborate_results.txt)", default="Kleborate_results.txt")
parser.add_option("-r", "--resistance", action="store", dest="resistance", help="Resistance genes screening (default off, set to on)", default="off")

return parser.parse_args()


Expand Down Expand Up @@ -48,25 +49,31 @@ def decompress_file(in_file, out_file):
o.write(s)


if __name__ == "__main__":
def kleborate():
(options, args) = parse_options()

(options, args) = main()
# find necessary resources
data_folder = resource_filename(__name__, 'data')
mlstblast = resource_filename(__name__, 'mlstBLAST.py')
resblast = resource_filename(__name__, 'resBLAST.py')
clusterblast = resource_filename(__name__, 'clusterBLAST.py')

header_string = "\t".join(["strain","ST","Yersiniabactin","YbST","Colibactin","CbST","aerobactin","salmochelin","hypermucoidy","wzi","KL"])
print header_string,

res_header_string = ""
if options.resistance == "on":
f = os.popen("python "+ options.repo_path + "/resBLAST.py -s " + options.repo_path + "/data/ARGannot.r1.fasta -t " + options.repo_path + "/data/ARGannot_clustered80.csv")

f = os.popen("python "+ resblast + " -s " + data_folder + "/ARGannot.r1.fasta -t " + data_folder + "/ARGannot_clustered80.csv")
fields = f.readline().rstrip().split("\t")
res_header_string = "\t".join(fields[1:])
f.close()
print "\t" + res_header_string,

print "" # end header

mlst_header_string = "\t".join(["Chr_ST","gapA","infB","mdh","pgi","phoE","rpoB","tonB","YbST","ybtS","ybtX","ybtQ","ybtP","ybtA","irp2","irp1","ybtU","ybtT","ybtE","fyuA","CbST","clbA","clbB","clbC","clbD","clbE","clbF","clbG","clbH","clbI","clbL","clbM","clbN","clbO","clbP","clbQ"])

o = file(options.outfile, "w")
o.write("\t".join([header_string,mlst_header_string]))
if options.resistance == "on":
Expand All @@ -76,7 +83,7 @@ def decompress_file(in_file, out_file):
for contigs in args:
(dir,fileName) = os.path.split(contigs)
(name,ext) = os.path.splitext(fileName)

# If the contigs are in a gz file, make a temporary decompressed FASTA file.
if get_compression_type(contigs) == 'gz':
new_contigs = contigs + '_temp_decompress.fasta'
Expand All @@ -86,12 +93,13 @@ def decompress_file(in_file, out_file):
else:
temp_decompress = False

f = os.popen("python "+ options.repo_path + "/mlstBLAST.py -s "+ options.repo_path + "/data/Klebsiella_pneumoniae.fasta -d "+ options.repo_path + "/data/kpneumoniae.txt -i no --maxmissing 3 " + contigs)

f = os.popen("python "+ mlstblast + " -s "+ data_folder + "/Klebsiella_pneumoniae.fasta -d "+ data_folder +"/kpneumoniae.txt -i no --maxmissing 3 " + contigs)

# run chromosome MLST
chr_ST = ""
chr_ST_detail = []

for line in f:
fields = line.rstrip().split("\t")
if fields[1] != "ST":
Expand All @@ -101,15 +109,15 @@ def decompress_file(in_file, out_file):
if chr_ST != "0":
chr_ST = "ST"+chr_ST
f.close()

# run ybt MLST
f = os.popen("python "+ options.repo_path + "/mlstBLAST.py -s "+ options.repo_path + "/data/ybt_alleles.fasta -d "+ options.repo_path + "/data/YbST_profiles.txt -i yes --maxmissing 3 " + contigs)

f = os.popen("python "+ mlstblast + " -s "+ data_folder +"/ybt_alleles.fasta -d "+ data_folder + "/YbST_profiles.txt -i yes --maxmissing 3 " + contigs)

Yb_ST = ""
Yb_group = ""
Yb_ST_detail = []

for line in f:
fields = line.rstrip().split("\t")
if fields[2] != "ST":
Expand All @@ -119,15 +127,15 @@ def decompress_file(in_file, out_file):
if Yb_group == "":
Yb_group = "-"
f.close()

# run colibactin MLST
f = os.popen("python "+ options.repo_path + "/mlstBLAST.py -s "+ options.repo_path + "/data/colibactin_alleles.fasta -d "+ options.repo_path + "/data/CbST_profiles.txt -i yes --maxmissing 3 " + contigs)

f = os.popen("python "+ mlstblast + " -s "+ data_folder + "/colibactin_alleles.fasta -d "+ data_folder + "/CbST_profiles.txt -i yes --maxmissing 3 " + contigs)

Cb_ST = ""
Cb_group = ""
Cb_ST_detail = []

for line in f:
fields = line.rstrip().split("\t")
if fields[2] != "ST":
Expand All @@ -137,19 +145,19 @@ def decompress_file(in_file, out_file):
if Cb_group == "":
Cb_group = "-"
f.close()

# screen for other virulence genes (binary calls)

f = os.popen("python "+ options.repo_path + "/clusterBLAST.py -s "+ options.repo_path + "/data/other_vir_clusters.fasta " + contigs)
f = os.popen("python "+ clusterblast + " -s "+ data_folder + "/other_vir_clusters.fasta " + contigs)
for line in f:
fields = line.rstrip().split("\t")
if fields[1] != "aerobactin":
# skip header
(strain,vir_hits) = (fields[0],"\t".join(fields[1:]))
f.close()

# screen for wzi allele
f = os.popen("python "+ options.repo_path + "/mlstBLAST.py -s " + options.repo_path + "/data/wzi.fasta -d " + options.repo_path + "/data/wzi.txt -i yes --maxmissing 0 -m 99 " + contigs)
f = os.popen("python "+ mlstblast + " -s " + data_folder + "/wzi.fasta -d " + data_folder + "/wzi.txt -i yes --maxmissing 0 -m 99 " + contigs)
for line in f:
fields = line.rstrip().split("\t")
if fields[0] != "ST":
Expand All @@ -163,20 +171,20 @@ def decompress_file(in_file, out_file):
# screen for resistance genes
res_hits = ""
if options.resistance == "on":
f = os.popen("python "+ options.repo_path + "/resBLAST.py -s " + options.repo_path + "/data/ARGannot.r1.fasta -t " + options.repo_path + "/data/ARGannot_clustered80.csv -q" + options.repo_path + "/data/QRDR_120.aa " + contigs)
f = os.popen("python "+ resblast + " -s " + data_folder + "/ARGannot.r1.fasta -t " + data_folder + "/ARGannot_clustered80.csv -q" + data_folder + "/QRDR_120.aa " + contigs)
for line in f:
fields = line.rstrip().split("\t")
if fields[0] != "strain":
# skip header
res_hits = "\t".join(fields[1:])
res_hits = "\t".join(fields[1:])
f.close()

# record results
print "\t".join([name,chr_ST,Yb_group,Yb_ST,Cb_group,Cb_ST,vir_hits,wzi_ST,Ktype]),
if options.resistance == "on":
print "\t" + res_hits,
print ""

o.write("\t".join([name,chr_ST,Yb_group,Yb_ST,Cb_group,Cb_ST,vir_hits,wzi_ST,Ktype,chr_ST]+chr_ST_detail+[Yb_ST]+Yb_ST_detail + [Cb_ST] + Cb_ST_detail))
if options.resistance == "on":
o.write("\t" + res_hits)
Expand All @@ -190,5 +198,8 @@ def decompress_file(in_file, out_file):
# If we've been working on a temporary decompressed file, delete it now.
if temp_decompress:
os.remove(contigs)

o.close()

if __name__ == "__main__":
kleborate()
Empty file added kleborate/__init__.py
Empty file.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
35 changes: 35 additions & 0 deletions setup.py
@@ -0,0 +1,35 @@
from setuptools import setup


def readme():
with open('README.md') as f:
return f.read()


setup(name='Kleborate',
version='0.0.1',
description='Kleborate',
long_description=readme(),
classifiers=[
'Development Status :: 4 - Beta',
'License :: OSI Approved :: GPLv3',
'Programming Language :: Python :: 2.7',
'Topic :: Scientific/Engineering :: Bio-Informatics',
'Topic :: Scientific/Engineering :: Medical Science Apps.',
'Intended Audience :: Science/Research',
],
keywords='microbial genomics sequence typing',
url='https://github.com/katholt/Kleborate',
author='Kathryn Holt',
author_email='',
license='GPLv2',
packages=['kleborate'],
install_requires=[
],
test_suite='nose.collector',
tests_require=[],
entry_points={
'console_scripts': ['kleborate=kleborate.Kleborate:kleborate'],
},
include_package_data=True,
zip_safe=False)
2 changes: 1 addition & 1 deletion test.sh 100644 → 100755
Expand Up @@ -20,4 +20,4 @@ mv GCA_000016305.1_ASM1630v1_genomic.fna MGH78578.fna

# run typing
# NOTE: -p must point to the Kleborate directory
python Kleborate.py -p . -o details.txt *.fna
kleborate -p . -o details.txt *.fna

0 comments on commit 1bd0133

Please sign in to comment.