/
RunProkka.ipy
executable file
·38 lines (27 loc) · 1.06 KB
/
RunProkka.ipy
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/usr/bin/ipython
; -*- mode: python -*-
import sys
import os
# This script runs the annotations program Prokka on a directory of fasta format
# genomes. Each genome will hava a directory as output. Additionally, a
# directory containing protein sequences in fasta format for each genome will be
# created as well as a directory containing the corresponding nucleotide seqs.
def usage():
print "RunProkka.ipy\n \
<directory with fasta genomes>"
if len(sys.argv) != 2:
usage()
genomeDirectory = sys.argv[1]
# make directories for protein and dna sequences if they don't already exist
!mkdir -p dnaSeqs
!mkdir -p proteinSeqs
# run prokka for each genome in directory
genomes = !ls {genomeDirectory}
for genome in genomes:
prefix = os.path.splitext(genome)[0]
print prefix
outdir = genomeDirectory + prefix + '_annotation'
print outdir
!prokka --outdir {outdir} --prefix {prefix} --locustag {prefix} {genomeDirectory + genome}
!cp {outdir + '/' + prefix + '.faa'} proteinSeqs
!cp {outdir + '/' + prefix + '.ffn'} dnaSeqs