diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..1b36885 --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,2 @@ +graft kleborate/data +graft kleborate/ICEKp_references diff --git a/ICEKp_references/ICEKp1.embl b/kleborate/ICEKp_references/ICEKp1.embl similarity index 100% rename from ICEKp_references/ICEKp1.embl rename to kleborate/ICEKp_references/ICEKp1.embl diff --git a/ICEKp_references/ICEKp10.embl b/kleborate/ICEKp_references/ICEKp10.embl similarity index 100% rename from ICEKp_references/ICEKp10.embl rename to kleborate/ICEKp_references/ICEKp10.embl diff --git a/ICEKp_references/ICEKp11.embl b/kleborate/ICEKp_references/ICEKp11.embl similarity index 100% rename from ICEKp_references/ICEKp11.embl rename to kleborate/ICEKp_references/ICEKp11.embl diff --git a/ICEKp_references/ICEKp12.embl b/kleborate/ICEKp_references/ICEKp12.embl similarity index 100% rename from ICEKp_references/ICEKp12.embl rename to kleborate/ICEKp_references/ICEKp12.embl diff --git a/ICEKp_references/ICEKp13.embl b/kleborate/ICEKp_references/ICEKp13.embl similarity index 100% rename from ICEKp_references/ICEKp13.embl rename to kleborate/ICEKp_references/ICEKp13.embl diff --git a/ICEKp_references/ICEKp14.embl b/kleborate/ICEKp_references/ICEKp14.embl similarity index 100% rename from ICEKp_references/ICEKp14.embl rename to kleborate/ICEKp_references/ICEKp14.embl diff --git a/ICEKp_references/ICEKp2.embl b/kleborate/ICEKp_references/ICEKp2.embl similarity index 100% rename from ICEKp_references/ICEKp2.embl rename to kleborate/ICEKp_references/ICEKp2.embl diff --git a/ICEKp_references/ICEKp3.embl b/kleborate/ICEKp_references/ICEKp3.embl similarity index 100% rename from ICEKp_references/ICEKp3.embl rename to kleborate/ICEKp_references/ICEKp3.embl diff --git a/ICEKp_references/ICEKp4.embl b/kleborate/ICEKp_references/ICEKp4.embl similarity index 100% rename from ICEKp_references/ICEKp4.embl rename to kleborate/ICEKp_references/ICEKp4.embl diff --git a/ICEKp_references/ICEKp5.embl b/kleborate/ICEKp_references/ICEKp5.embl similarity index 100% rename from ICEKp_references/ICEKp5.embl rename to kleborate/ICEKp_references/ICEKp5.embl diff --git a/ICEKp_references/ICEKp6.embl b/kleborate/ICEKp_references/ICEKp6.embl similarity index 100% rename from ICEKp_references/ICEKp6.embl rename to kleborate/ICEKp_references/ICEKp6.embl diff --git a/ICEKp_references/ICEKp7.embl b/kleborate/ICEKp_references/ICEKp7.embl similarity index 100% rename from ICEKp_references/ICEKp7.embl rename to kleborate/ICEKp_references/ICEKp7.embl diff --git a/ICEKp_references/ICEKp8.embl b/kleborate/ICEKp_references/ICEKp8.embl similarity index 100% rename from ICEKp_references/ICEKp8.embl rename to kleborate/ICEKp_references/ICEKp8.embl diff --git a/ICEKp_references/ICEKp9.embl b/kleborate/ICEKp_references/ICEKp9.embl similarity index 100% rename from ICEKp_references/ICEKp9.embl rename to kleborate/ICEKp_references/ICEKp9.embl diff --git a/Kleborate.py b/kleborate/Kleborate.py similarity index 75% rename from Kleborate.py rename to kleborate/Kleborate.py index ac26f8c..f829696 100644 --- a/Kleborate.py +++ b/kleborate/Kleborate.py @@ -4,17 +4,18 @@ import os, sys, subprocess import gzip from optparse import OptionParser - -def main(): +from pkg_resources import resource_string, resource_filename + +def parse_options(): usage = "usage: %prog [options]" parser = OptionParser(usage=usage) # options - parser.add_option("-p", "--path", action="store", dest="repo_path", help="Path to Kleborate directory (default Kleborate)", default="Kleborate") + #parser.add_option("-p", "--path", action="store", dest="repo_path", help="Path to Kleborate directory (default Kleborate)", default="Kleborate") parser.add_option("-o", "--outfile", action="store", dest="outfile", help="File for detailed output (default Kleborate_results.txt)", default="Kleborate_results.txt") parser.add_option("-r", "--resistance", action="store", dest="resistance", help="Resistance genes screening (default off, set to on)", default="off") - + return parser.parse_args() @@ -48,25 +49,31 @@ def decompress_file(in_file, out_file): o.write(s) -if __name__ == "__main__": +def kleborate(): + (options, args) = parse_options() - (options, args) = main() + # find necessary resources + data_folder = resource_filename(__name__, 'data') + mlstblast = resource_filename(__name__, 'mlstBLAST.py') + resblast = resource_filename(__name__, 'resBLAST.py') + clusterblast = resource_filename(__name__, 'clusterBLAST.py') header_string = "\t".join(["strain","ST","Yersiniabactin","YbST","Colibactin","CbST","aerobactin","salmochelin","hypermucoidy","wzi","KL"]) print header_string, - + res_header_string = "" if options.resistance == "on": - f = os.popen("python "+ options.repo_path + "/resBLAST.py -s " + options.repo_path + "/data/ARGannot.r1.fasta -t " + options.repo_path + "/data/ARGannot_clustered80.csv") + + f = os.popen("python "+ resblast + " -s " + data_folder + "/ARGannot.r1.fasta -t " + data_folder + "/ARGannot_clustered80.csv") fields = f.readline().rstrip().split("\t") res_header_string = "\t".join(fields[1:]) f.close() print "\t" + res_header_string, - + print "" # end header - + mlst_header_string = "\t".join(["Chr_ST","gapA","infB","mdh","pgi","phoE","rpoB","tonB","YbST","ybtS","ybtX","ybtQ","ybtP","ybtA","irp2","irp1","ybtU","ybtT","ybtE","fyuA","CbST","clbA","clbB","clbC","clbD","clbE","clbF","clbG","clbH","clbI","clbL","clbM","clbN","clbO","clbP","clbQ"]) - + o = file(options.outfile, "w") o.write("\t".join([header_string,mlst_header_string])) if options.resistance == "on": @@ -76,7 +83,7 @@ def decompress_file(in_file, out_file): for contigs in args: (dir,fileName) = os.path.split(contigs) (name,ext) = os.path.splitext(fileName) - + # If the contigs are in a gz file, make a temporary decompressed FASTA file. if get_compression_type(contigs) == 'gz': new_contigs = contigs + '_temp_decompress.fasta' @@ -86,12 +93,13 @@ def decompress_file(in_file, out_file): else: temp_decompress = False - f = os.popen("python "+ options.repo_path + "/mlstBLAST.py -s "+ options.repo_path + "/data/Klebsiella_pneumoniae.fasta -d "+ options.repo_path + "/data/kpneumoniae.txt -i no --maxmissing 3 " + contigs) + + f = os.popen("python "+ mlstblast + " -s "+ data_folder + "/Klebsiella_pneumoniae.fasta -d "+ data_folder +"/kpneumoniae.txt -i no --maxmissing 3 " + contigs) # run chromosome MLST chr_ST = "" chr_ST_detail = [] - + for line in f: fields = line.rstrip().split("\t") if fields[1] != "ST": @@ -101,15 +109,15 @@ def decompress_file(in_file, out_file): if chr_ST != "0": chr_ST = "ST"+chr_ST f.close() - + # run ybt MLST - - f = os.popen("python "+ options.repo_path + "/mlstBLAST.py -s "+ options.repo_path + "/data/ybt_alleles.fasta -d "+ options.repo_path + "/data/YbST_profiles.txt -i yes --maxmissing 3 " + contigs) + + f = os.popen("python "+ mlstblast + " -s "+ data_folder +"/ybt_alleles.fasta -d "+ data_folder + "/YbST_profiles.txt -i yes --maxmissing 3 " + contigs) Yb_ST = "" Yb_group = "" Yb_ST_detail = [] - + for line in f: fields = line.rstrip().split("\t") if fields[2] != "ST": @@ -119,15 +127,15 @@ def decompress_file(in_file, out_file): if Yb_group == "": Yb_group = "-" f.close() - + # run colibactin MLST - - f = os.popen("python "+ options.repo_path + "/mlstBLAST.py -s "+ options.repo_path + "/data/colibactin_alleles.fasta -d "+ options.repo_path + "/data/CbST_profiles.txt -i yes --maxmissing 3 " + contigs) + + f = os.popen("python "+ mlstblast + " -s "+ data_folder + "/colibactin_alleles.fasta -d "+ data_folder + "/CbST_profiles.txt -i yes --maxmissing 3 " + contigs) Cb_ST = "" Cb_group = "" Cb_ST_detail = [] - + for line in f: fields = line.rstrip().split("\t") if fields[2] != "ST": @@ -137,19 +145,19 @@ def decompress_file(in_file, out_file): if Cb_group == "": Cb_group = "-" f.close() - + # screen for other virulence genes (binary calls) - f = os.popen("python "+ options.repo_path + "/clusterBLAST.py -s "+ options.repo_path + "/data/other_vir_clusters.fasta " + contigs) + f = os.popen("python "+ clusterblast + " -s "+ data_folder + "/other_vir_clusters.fasta " + contigs) for line in f: fields = line.rstrip().split("\t") if fields[1] != "aerobactin": # skip header (strain,vir_hits) = (fields[0],"\t".join(fields[1:])) f.close() - + # screen for wzi allele - f = os.popen("python "+ options.repo_path + "/mlstBLAST.py -s " + options.repo_path + "/data/wzi.fasta -d " + options.repo_path + "/data/wzi.txt -i yes --maxmissing 0 -m 99 " + contigs) + f = os.popen("python "+ mlstblast + " -s " + data_folder + "/wzi.fasta -d " + data_folder + "/wzi.txt -i yes --maxmissing 0 -m 99 " + contigs) for line in f: fields = line.rstrip().split("\t") if fields[0] != "ST": @@ -163,12 +171,12 @@ def decompress_file(in_file, out_file): # screen for resistance genes res_hits = "" if options.resistance == "on": - f = os.popen("python "+ options.repo_path + "/resBLAST.py -s " + options.repo_path + "/data/ARGannot.r1.fasta -t " + options.repo_path + "/data/ARGannot_clustered80.csv -q" + options.repo_path + "/data/QRDR_120.aa " + contigs) + f = os.popen("python "+ resblast + " -s " + data_folder + "/ARGannot.r1.fasta -t " + data_folder + "/ARGannot_clustered80.csv -q" + data_folder + "/QRDR_120.aa " + contigs) for line in f: fields = line.rstrip().split("\t") if fields[0] != "strain": # skip header - res_hits = "\t".join(fields[1:]) + res_hits = "\t".join(fields[1:]) f.close() # record results @@ -176,7 +184,7 @@ def decompress_file(in_file, out_file): if options.resistance == "on": print "\t" + res_hits, print "" - + o.write("\t".join([name,chr_ST,Yb_group,Yb_ST,Cb_group,Cb_ST,vir_hits,wzi_ST,Ktype,chr_ST]+chr_ST_detail+[Yb_ST]+Yb_ST_detail + [Cb_ST] + Cb_ST_detail)) if options.resistance == "on": o.write("\t" + res_hits) @@ -190,5 +198,8 @@ def decompress_file(in_file, out_file): # If we've been working on a temporary decompressed file, delete it now. if temp_decompress: os.remove(contigs) - + o.close() + +if __name__ == "__main__": + kleborate() diff --git a/kleborate/__init__.py b/kleborate/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/clusterBLAST.py b/kleborate/clusterBLAST.py similarity index 100% rename from clusterBLAST.py rename to kleborate/clusterBLAST.py diff --git a/data/ARGannot.r1.fasta b/kleborate/data/ARGannot.r1.fasta similarity index 100% rename from data/ARGannot.r1.fasta rename to kleborate/data/ARGannot.r1.fasta diff --git a/data/ARGannot.r1.fasta.nhr b/kleborate/data/ARGannot.r1.fasta.nhr similarity index 100% rename from data/ARGannot.r1.fasta.nhr rename to kleborate/data/ARGannot.r1.fasta.nhr diff --git a/data/ARGannot.r1.fasta.nin b/kleborate/data/ARGannot.r1.fasta.nin similarity index 100% rename from data/ARGannot.r1.fasta.nin rename to kleborate/data/ARGannot.r1.fasta.nin diff --git a/data/ARGannot.r1.fasta.nsq b/kleborate/data/ARGannot.r1.fasta.nsq similarity index 100% rename from data/ARGannot.r1.fasta.nsq rename to kleborate/data/ARGannot.r1.fasta.nsq diff --git a/data/ARGannot_clustered80.csv b/kleborate/data/ARGannot_clustered80.csv similarity index 100% rename from data/ARGannot_clustered80.csv rename to kleborate/data/ARGannot_clustered80.csv diff --git a/data/CbST_profiles.txt b/kleborate/data/CbST_profiles.txt similarity index 100% rename from data/CbST_profiles.txt rename to kleborate/data/CbST_profiles.txt diff --git a/data/Genotyped_Isolates_Lam2017.txt b/kleborate/data/Genotyped_Isolates_Lam2017.txt similarity index 100% rename from data/Genotyped_Isolates_Lam2017.txt rename to kleborate/data/Genotyped_Isolates_Lam2017.txt diff --git a/data/Klebsiella_pneumoniae.fasta b/kleborate/data/Klebsiella_pneumoniae.fasta similarity index 100% rename from data/Klebsiella_pneumoniae.fasta rename to kleborate/data/Klebsiella_pneumoniae.fasta diff --git a/data/Klebsiella_pneumoniae.fasta.nhr b/kleborate/data/Klebsiella_pneumoniae.fasta.nhr similarity index 100% rename from data/Klebsiella_pneumoniae.fasta.nhr rename to kleborate/data/Klebsiella_pneumoniae.fasta.nhr diff --git a/data/Klebsiella_pneumoniae.fasta.nin b/kleborate/data/Klebsiella_pneumoniae.fasta.nin similarity index 100% rename from data/Klebsiella_pneumoniae.fasta.nin rename to kleborate/data/Klebsiella_pneumoniae.fasta.nin diff --git a/data/Klebsiella_pneumoniae.fasta.nsq b/kleborate/data/Klebsiella_pneumoniae.fasta.nsq similarity index 100% rename from data/Klebsiella_pneumoniae.fasta.nsq rename to kleborate/data/Klebsiella_pneumoniae.fasta.nsq diff --git a/data/QRDR_120.aa b/kleborate/data/QRDR_120.aa similarity index 100% rename from data/QRDR_120.aa rename to kleborate/data/QRDR_120.aa diff --git a/data/QRDR_120.aa.nhr b/kleborate/data/QRDR_120.aa.nhr similarity index 100% rename from data/QRDR_120.aa.nhr rename to kleborate/data/QRDR_120.aa.nhr diff --git a/data/QRDR_120.aa.nin b/kleborate/data/QRDR_120.aa.nin similarity index 100% rename from data/QRDR_120.aa.nin rename to kleborate/data/QRDR_120.aa.nin diff --git a/data/QRDR_120.aa.nsq b/kleborate/data/QRDR_120.aa.nsq similarity index 100% rename from data/QRDR_120.aa.nsq rename to kleborate/data/QRDR_120.aa.nsq diff --git a/data/QRDR_120.aa.phr b/kleborate/data/QRDR_120.aa.phr similarity index 100% rename from data/QRDR_120.aa.phr rename to kleborate/data/QRDR_120.aa.phr diff --git a/data/QRDR_120.aa.pin b/kleborate/data/QRDR_120.aa.pin similarity index 100% rename from data/QRDR_120.aa.pin rename to kleborate/data/QRDR_120.aa.pin diff --git a/data/QRDR_120.aa.psq b/kleborate/data/QRDR_120.aa.psq similarity index 100% rename from data/QRDR_120.aa.psq rename to kleborate/data/QRDR_120.aa.psq diff --git a/data/YbST_profiles.txt b/kleborate/data/YbST_profiles.txt similarity index 100% rename from data/YbST_profiles.txt rename to kleborate/data/YbST_profiles.txt diff --git a/data/colibactin_alleles.fasta b/kleborate/data/colibactin_alleles.fasta similarity index 100% rename from data/colibactin_alleles.fasta rename to kleborate/data/colibactin_alleles.fasta diff --git a/data/colibactin_alleles.fasta.nhr b/kleborate/data/colibactin_alleles.fasta.nhr similarity index 100% rename from data/colibactin_alleles.fasta.nhr rename to kleborate/data/colibactin_alleles.fasta.nhr diff --git a/data/colibactin_alleles.fasta.nin b/kleborate/data/colibactin_alleles.fasta.nin similarity index 100% rename from data/colibactin_alleles.fasta.nin rename to kleborate/data/colibactin_alleles.fasta.nin diff --git a/data/colibactin_alleles.fasta.nsq b/kleborate/data/colibactin_alleles.fasta.nsq similarity index 100% rename from data/colibactin_alleles.fasta.nsq rename to kleborate/data/colibactin_alleles.fasta.nsq diff --git a/data/kpneumoniae.txt b/kleborate/data/kpneumoniae.txt similarity index 100% rename from data/kpneumoniae.txt rename to kleborate/data/kpneumoniae.txt diff --git a/data/other_vir_clusters.fasta b/kleborate/data/other_vir_clusters.fasta similarity index 100% rename from data/other_vir_clusters.fasta rename to kleborate/data/other_vir_clusters.fasta diff --git a/data/other_vir_clusters.fasta.nhr b/kleborate/data/other_vir_clusters.fasta.nhr similarity index 100% rename from data/other_vir_clusters.fasta.nhr rename to kleborate/data/other_vir_clusters.fasta.nhr diff --git a/data/other_vir_clusters.fasta.nin b/kleborate/data/other_vir_clusters.fasta.nin similarity index 100% rename from data/other_vir_clusters.fasta.nin rename to kleborate/data/other_vir_clusters.fasta.nin diff --git a/data/other_vir_clusters.fasta.nsq b/kleborate/data/other_vir_clusters.fasta.nsq similarity index 100% rename from data/other_vir_clusters.fasta.nsq rename to kleborate/data/other_vir_clusters.fasta.nsq diff --git a/data/wzi.fasta b/kleborate/data/wzi.fasta similarity index 100% rename from data/wzi.fasta rename to kleborate/data/wzi.fasta diff --git a/data/wzi.fasta.nhr b/kleborate/data/wzi.fasta.nhr similarity index 100% rename from data/wzi.fasta.nhr rename to kleborate/data/wzi.fasta.nhr diff --git a/data/wzi.fasta.nin b/kleborate/data/wzi.fasta.nin similarity index 100% rename from data/wzi.fasta.nin rename to kleborate/data/wzi.fasta.nin diff --git a/data/wzi.fasta.nsq b/kleborate/data/wzi.fasta.nsq similarity index 100% rename from data/wzi.fasta.nsq rename to kleborate/data/wzi.fasta.nsq diff --git a/data/wzi.txt b/kleborate/data/wzi.txt similarity index 100% rename from data/wzi.txt rename to kleborate/data/wzi.txt diff --git a/data/ybt_alleles.fasta b/kleborate/data/ybt_alleles.fasta similarity index 100% rename from data/ybt_alleles.fasta rename to kleborate/data/ybt_alleles.fasta diff --git a/data/ybt_alleles.fasta.nhr b/kleborate/data/ybt_alleles.fasta.nhr similarity index 100% rename from data/ybt_alleles.fasta.nhr rename to kleborate/data/ybt_alleles.fasta.nhr diff --git a/data/ybt_alleles.fasta.nin b/kleborate/data/ybt_alleles.fasta.nin similarity index 100% rename from data/ybt_alleles.fasta.nin rename to kleborate/data/ybt_alleles.fasta.nin diff --git a/data/ybt_alleles.fasta.nsq b/kleborate/data/ybt_alleles.fasta.nsq similarity index 100% rename from data/ybt_alleles.fasta.nsq rename to kleborate/data/ybt_alleles.fasta.nsq diff --git a/mlstBLAST.py b/kleborate/mlstBLAST.py similarity index 100% rename from mlstBLAST.py rename to kleborate/mlstBLAST.py diff --git a/resBLAST.py b/kleborate/resBLAST.py similarity index 100% rename from resBLAST.py rename to kleborate/resBLAST.py diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..26e3b1d --- /dev/null +++ b/setup.py @@ -0,0 +1,35 @@ +from setuptools import setup + + +def readme(): + with open('README.md') as f: + return f.read() + + +setup(name='Kleborate', + version='0.0.1', + description='Kleborate', + long_description=readme(), + classifiers=[ + 'Development Status :: 4 - Beta', + 'License :: OSI Approved :: GPLv3', + 'Programming Language :: Python :: 2.7', + 'Topic :: Scientific/Engineering :: Bio-Informatics', + 'Topic :: Scientific/Engineering :: Medical Science Apps.', + 'Intended Audience :: Science/Research', + ], + keywords='microbial genomics sequence typing', + url='https://github.com/katholt/Kleborate', + author='Kathryn Holt', + author_email='', + license='GPLv2', + packages=['kleborate'], + install_requires=[ + ], + test_suite='nose.collector', + tests_require=[], + entry_points={ + 'console_scripts': ['kleborate=kleborate.Kleborate:kleborate'], + }, + include_package_data=True, + zip_safe=False) diff --git a/test.sh b/test.sh old mode 100644 new mode 100755 index aba553e..d760fdd --- a/test.sh +++ b/test.sh @@ -20,4 +20,4 @@ mv GCA_000016305.1_ASM1630v1_genomic.fna MGH78578.fna # run typing # NOTE: -p must point to the Kleborate directory -python Kleborate.py -p . -o details.txt *.fna \ No newline at end of file +kleborate -p . -o details.txt *.fna