# Converting antismash GBK output into a TSV

In [1]:
import os
import pandas as pd
from Bio import SeqIO

In [52]:
workDir = '/home/sam/FullCyc_metagenome/annotation/antismash'
antismash_output = os.path.join(workDir, 'antismash_output', 'Ga0334612_contigs.gbk')
#antismash_output = os.path.join(workDir, 'antismash_output')
table_output = os.path.join(workDir, 'antismash_BCGtable.tsv')

## Read in the antismash output

In [45]:
gbk_files = [os.path.join(antismash_output, f) for f in os.listdir(antismash_output) if f.endswith('region001.gbk')]

In [50]:
n_BGC = 0
for gbk in gbk_files:
    BCG_records = SeqIO.parse(gbk, 'genbank')
    for record in BCG_records:
        for feature in record.features:
            if feature.type == 'region':
                print(feature)
                n_BGC += 1

type: region
location: [0:5335](+)
qualifiers:
    Key: candidate_cluster_numbers, Value: ['1']
    Key: contig_edge, Value: ['True']
    Key: product, Value: ['T1PKS']
    Key: region_number, Value: ['1']
    Key: rules, Value: ['cds(PKS_AT and (PKS_KS or ene_KS or mod_KS or hyb_KS or itr_KS or tra_KS))']
    Key: tool, Value: ['antismash']

type: region
location: [0:7160](+)
qualifiers:
    Key: candidate_cluster_numbers, Value: ['1']
    Key: contig_edge, Value: ['True']
    Key: product, Value: ['phosphonate']
    Key: region_number, Value: ['1']
    Key: rules, Value: ['phosphonates']
    Key: tool, Value: ['antismash']

type: region
location: [0:7285](+)
qualifiers:
    Key: candidate_cluster_numbers, Value: ['1']
    Key: contig_edge, Value: ['True']
    Key: product, Value: ['terpene']
    Key: region_number, Value: ['1']
    Key: rules, Value: ['(Terpene_synth or Terpene_synth_C or phytoene_synt or Lycopene_cycl or terpene_cyclase or NapT7 or fung_ggpps or fung_ggpps2 or trich

type: region
location: [0:6186](+)
qualifiers:
    Key: candidate_cluster_numbers, Value: ['1']
    Key: contig_edge, Value: ['True']
    Key: product, Value: ['NRPS']
    Key: region_number, Value: ['1']
    Key: rules, Value: ['(cds(Condensation and (AMP-binding or A-OX)) or (Condensation and AMP-binding))']
    Key: tool, Value: ['antismash']

type: region
location: [0:6467](+)
qualifiers:
    Key: candidate_cluster_numbers, Value: ['1']
    Key: contig_edge, Value: ['True']
    Key: product, Value: ['NRPS']
    Key: region_number, Value: ['1']
    Key: rules, Value: ['(cds(Condensation and (AMP-binding or A-OX)) or (Condensation and AMP-binding))']
    Key: tool, Value: ['antismash']

type: region
location: [0:7936](+)
qualifiers:
    Key: candidate_cluster_numbers, Value: ['1']
    Key: contig_edge, Value: ['True']
    Key: product, Value: ['bacteriocin']
    Key: region_number, Value: ['1']
    Key: rules, Value: ['(strepbact or Antimicrobial14 or Bacteriocin_IId or BacteriocIIc_

type: region
location: [0:17602](+)
qualifiers:
    Key: candidate_cluster_numbers, Value: ['1']
    Key: contig_edge, Value: ['True']
    Key: product, Value: ['NRPS']
    Key: region_number, Value: ['1']
    Key: rules, Value: ['(cds(Condensation and (AMP-binding or A-OX)) or (Condensation and AMP-binding))']
    Key: tool, Value: ['antismash']

type: region
location: [0:8485](+)
qualifiers:
    Key: candidate_cluster_numbers, Value: ['1']
    Key: contig_edge, Value: ['True']
    Key: product, Value: ['bacteriocin']
    Key: region_number, Value: ['1']
    Key: rules, Value: ['(strepbact or Antimicrobial14 or Bacteriocin_IId or BacteriocIIc_cy or Bacteriocin_II or Bacteriocin_IIi or Lactococcin or Antimicrobial17 or Lactococcin_972 or Bacteriocin_IIc or LcnG-beta or Cloacin or Linocin_M18 or TIGR03603 or TIGR03604 or TIGR03651 or TIGR03678 or TIGR03693 or TIGR03798 or TIGR03601 or TIGR03602 or TIGR03795 or TIGR03793 or TIGR03975 or DUF692 or TIGR01193)']
    Key: tool, Value: ['anti

type: region
location: [0:9404](+)
qualifiers:
    Key: candidate_cluster_numbers, Value: ['1']
    Key: contig_edge, Value: ['True']
    Key: product, Value: ['lanthipeptide']
    Key: region_number, Value: ['1']
    Key: rules, Value: ['((LANC_like and (Lant_dehydr_N or Lant_dehydr_C) or cds(LANC_like and (Pkinase or DUF4135))) and not (YcaO or TIGR03882))']
    Key: tool, Value: ['antismash']

type: region
location: [0:9007](+)
qualifiers:
    Key: candidate_cluster_numbers, Value: ['1']
    Key: contig_edge, Value: ['True']
    Key: product, Value: ['NRPS']
    Key: region_number, Value: ['1']
    Key: rules, Value: ['(cds(Condensation and (AMP-binding or A-OX)) or (Condensation and AMP-binding))']
    Key: tool, Value: ['antismash']

type: region
location: [0:5834](+)
qualifiers:
    Key: candidate_cluster_numbers, Value: ['1']
    Key: contig_edge, Value: ['True']
    Key: product, Value: ['NRPS']
    Key: region_number, Value: ['1']
    Key: rules, Value: ['(cds(Condensation and

In [51]:
n_BGC

169

In [49]:
for feat in record.features:
    print(feat)

type: protocluster
location: [0:5759](+)
qualifiers:
    Key: aStool, Value: ['rule-based-clusters']
    Key: contig_edge, Value: ['True']
    Key: core_location, Value: ['[1:4510](-)']
    Key: cutoff, Value: ['20000']
    Key: detection_rule, Value: ['(cds(Condensation and (AMP-binding or A-OX)) or (Condensation and AMP-binding))']
    Key: neighbourhood, Value: ['20000']
    Key: product, Value: ['NRPS']
    Key: protocluster_number, Value: ['1']
    Key: tool, Value: ['antismash']

type: proto_core
location: [1:4510](-)
qualifiers:
    Key: aStool, Value: ['rule-based-clusters']
    Key: cutoff, Value: ['20000']
    Key: detection_rule, Value: ['(cds(Condensation and (AMP-binding or A-OX)) or (Condensation and AMP-binding))']
    Key: neighbourhood, Value: ['20000']
    Key: product, Value: ['NRPS']
    Key: protocluster_number, Value: ['1']
    Key: tool, Value: ['antismash']

type: cand_cluster
location: [0:5759](+)
qualifiers:
    Key: SMILES, Value: ['NC(CO)C(=O)O']
    Key: ca