In [17]:
from Bio import SeqIO


In [18]:
allowed_qualifiers = ['locus_tag', 'gene', 'product', 'pseudo', 'protein_id', 'gene_desc', 'old_locus_tag', 'note', 'inference', \
					  'organism', 'mol_type', 'strain', 'sub_species', 'isolation-source', 'country', \
					  'collection_date']  # In GenBank files, the qualifier 'collection-date' is written as 'collection_date'.

In [19]:
records = list(SeqIO.parse('Erdman.curated.gb', 'genbank'))  # read a GenBank file from the standard input and convert it into a list of SeqRecord objects


In [20]:
fasta_fh = open('Erdman.fsa', 'w')
feature_fh = open('Erdman.tbl', 'w')

for rec in records:  # for every SeqRecord object in the list 'records'
    SeqIO.write([rec], fasta_fh, 'fasta')  # Prints this contig's sequence to the fasta file. The sequence header will be rec.description.

    # write the feature table
    print('>Feature %s' % (rec.name), file = feature_fh)  # write the first line of this record in the feature table: the LOCUS name
    for f in rec.features:
        # print the coordinates
        if f.strand == 1:
            print('%d\t%d\t%s' % (f.location.nofuzzy_start + 1, f.location.nofuzzy_end, f.type), file = feature_fh)
        else:
            print('%d\t%d\t%s' % (f.location.nofuzzy_end, f.location.nofuzzy_start + 1, f.type), file = feature_fh)

        if (f.type == 'CDS') and ('product' not in f.qualifiers):
            f.qualifiers['product'] = 'hypothetical protein'
        # print qualifiers (keys and values)
        for (key, values) in f.qualifiers.items():
            '''
            Apply the iteritems() method of the dictionary f.qualifiers for (key, values) pairs
            iteritems() is a generator that yields 2-tuples for a dictionary. It saves time and memory but is slower than the items() method.
            '''
            # if key not in allowed_qualifiers:
            #     continue  # start a new 'for' loop of f, skipping the following 'for' statement of v
            for v in values:  # else, write all values under this key (qualifier's name)
                print('\t\t\t%s\t%s' % (key, v), file = feature_fh)
fasta_fh.close()  # finish the generation of the FASTA file
feature_fh.close()  # finish the generation of the feature table