-
Notifications
You must be signed in to change notification settings - Fork 3
/
split_fasta.py
executable file
·31 lines (28 loc) · 987 Bytes
/
split_fasta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
#!/usr/bin/env python
# Usage: ./split_fasta.py /path/to/fasta.fa 2000
# ./split_fasta.py /path/to/reads.fastq 100
from __future__ import print_function
from Bio import SeqIO
import sys
import os
fasta = sys.argv[1]
n = int(sys.argv[2])
if fasta.endswith("a"):
fmt = "fasta"
elif fasta.endswith("q"):
fmt = "fastq"
else:
print("ERROR: Failed to read file {}. (Is it a fasta / fastq?)".format(fasta), file=sys.stderr)
exit(1)
out_fasta = os.path.join(os.getcwd(), os.path.basename(fasta))
with open(fasta, 'r') as in_handle:
out_handle = open("{}.1.{}".format(out_fasta, fmt), 'w') # start from 1 so PBS likes us
i = 0
for record in SeqIO.parse(in_handle, fmt):
if i % n == 0 and i > 0:
out_handle.close()
out_handle = open("{}.{}.{}".format(out_fasta, (i // n) + 1, fmt), 'w')
SeqIO.write(record, out_handle, fmt)
i += 1
out_handle.close()
print("Successfully split {} into {} files.".format(fasta, i // n))