In [1]:
import pandas as pd
import pprint
from BCBio.GFF import GFFExaminer
from BCBio import GFF
import gzip
import warnings
warnings.filterwarnings("ignore")

# Generic Feature Format (GFF) is a biological sequence file format for representing features and annotations on sequences

# Overview

In [2]:
in_file = "data/GCA_001404095.1_CH1034_genomic.gff.gz"
examiner = GFFExaminer()
in_handle = gzip.open(in_file, mode='rt')
pprint.pprint(examiner.parent_child_map(in_handle))
in_handle.close()

{('EMBL', 'gene'): [('EMBL', 'CDS'),
                    ('EMBL', 'rRNA'),
                    ('EMBL', 'tRNA'),
                    ('EMBL', 'transcript')],
 ('EMBL', 'pseudogene'): [('EMBL', 'CDS')],
 ('EMBL', 'rRNA'): [('EMBL', 'exon')],
 ('EMBL', 'tRNA'): [('EMBL', 'exon')],
 ('EMBL', 'transcript'): [('EMBL', 'exon')]}


In [3]:
in_file = "data/GCA_001404095.1_CH1034_genomic.gff.gz"
examiner = GFFExaminer()
in_handle = gzip.open(in_file, mode='rt')
pprint.pprint(examiner.available_limits(in_handle))
in_handle.close()

{'gff_id': {('CXPD01000001.1',): 201,
            ('CXPD01000002.1',): 47,
            ('CXPD01000003.1',): 307,
            ('CXPD01000004.1',): 382,
            ('CXPD01000005.1',): 628,
            ('CXPD01000006.1',): 213,
            ('CXPD01000007.1',): 103,
            ('CXPD01000008.1',): 511,
            ('CXPD01000009.1',): 138,
            ('CXPD01000010.1',): 421,
            ('CXPD01000011.1',): 695,
            ('CXPD01000012.1',): 209,
            ('CXPD01000013.1',): 59,
            ('CXPD01000014.1',): 75,
            ('CXPD01000015.1',): 603,
            ('CXPD01000016.1',): 367,
            ('CXPD01000017.1',): 778,
            ('CXPD01000018.1',): 745,
            ('CXPD01000019.1',): 133,
            ('CXPD01000020.1',): 228,
            ('CXPD01000021.1',): 778,
            ('CXPD01000022.1',): 97,
            ('CXPD01000023.1',): 179,
            ('CXPD01000024.1',): 675,
            ('CXPD01000025.1',): 1,
            ('CXPD01000026.1',): 3,
            ('CXPD01

# Parse GFF file (by CDS)

## GenBank, GFF downloaded from NCBI

In [11]:
in_file = "data/GCA_001404095.1_CH1034_genomic.gff.gz" # file path
in_handle = gzip.open(in_file, mode='rt') # read the file
limit_info = dict(gff_type=["CDS"]) # select only CDS as feature type

res = [] # storage of results
for rec in GFF.parse(in_handle, limit_info=limit_info): # rec is a sequence
    for f in rec.features: # f is a feature
        criteria1 = ('product' in f.qualifiers and 'manganese' in f.qualifiers['product'][0].lower())
        criteria2 = ('gene' in f.qualifiers and 'mnt' in f.qualifiers['gene'][0].lower())
        if criteria1 or criteria2:
            res.append([
                'GCA_001404095.1', # genome ID
                rec.id, # sequence ID
                f.id, # feature ID
                int(f.location.start), # start
                int(f.location.end),   # end
                f.location.strand, # strand
                f.qualifiers['source'][0], # source of this annotation
                f.qualifiers['gene'][0] if 'gene' in f.qualifiers else '', # gene name
                f.qualifiers['product'][0], # product
                f.qualifiers['locus_tag'][0] # locus tag
            ])
df_res_genbank_ncbi = pd.DataFrame(res, columns=['GenomeID','SequenceID','FeatureID','StartPosition','EndPosition','Strand','Source','Name','Product','LocusTag'])
            
in_handle.close()

In [12]:
df_res_genbank_ncbi

Unnamed: 0,GenomeID,SequenceID,FeatureID,StartPosition,EndPosition,Strand,Source,Name,Product,LocusTag
0,GCA_001404095.1,CXPD01000008.1,cds-CTQ27225.1,222051,222177,-1,EMBL,mntS,Small protein MntS,CH1034_160213
1,GCA_001404095.1,CXPD01000008.1,cds-CTQ27226.1,222362,222836,1,EMBL,mntR,DNA-binding transcriptional regulator of mntH,CH1034_160214
2,GCA_001404095.1,CXPD01000018.1,cds-CTQ29230.1,407809,409051,-1,EMBL,mntH,manganese/divalent cation transporter,CH1034_250358
3,GCA_001404095.1,CXPD01000032.1,cds-CTQ30648.1,1,739,-1,EMBL,mntB,Manganese transport system ATP-binding protein...,CH1034_380001


## RefSeq, GFF downloaded from NCBI

In [13]:
in_file = "data/GCF_001404095.1_CH1034_genomic.gff.gz" # file path
in_handle = gzip.open(in_file, mode='rt') # read the file
limit_info = dict(gff_type=["CDS"]) # select only CDS as feature type

res = [] # storage of results
for rec in GFF.parse(in_handle, limit_info=limit_info): # rec is a sequence
    for f in rec.features: # f is a feature
        criteria1 = ('product' in f.qualifiers and 'manganese' in f.qualifiers['product'][0].lower())
        criteria2 = ('gene' in f.qualifiers and 'mnt' in f.qualifiers['gene'][0].lower())
        if criteria1 or criteria2:
            res.append([
                'GCF_001404095.1', # genome ID
                rec.id.lstrip('NZ_'), # sequence ID
                f.id, # feature ID
                int(f.location.start), # start
                int(f.location.end),   # end
                f.location.strand, # strand
                f.qualifiers['source'][0], # source of this annotation
                f.qualifiers['gene'][0] if 'gene' in f.qualifiers else '', # gene name
                f.qualifiers['product'][0], # product
                f.qualifiers['locus_tag'][0] # locus tag
            ])
df_res_refseq_ncbi = pd.DataFrame(res, columns=['GenomeID','SequenceID','FeatureID','StartPosition','EndPosition','Strand','Source','Name','Product','LocusTag'])
            
in_handle.close()

In [14]:
df_res_refseq_ncbi

Unnamed: 0,GenomeID,SequenceID,FeatureID,StartPosition,EndPosition,Strand,Source,Name,Product,LocusTag
0,GCF_001404095.1,CXPD01000008.1,cds-WP_002895851.1,222051,222177,-1,Protein Homology,mntS,manganase accumulation protein MntS,CH1034_RS05790
1,GCF_001404095.1,CXPD01000008.1,cds-WP_004176765.1,222362,222836,1,Protein Homology,mntR,manganese-binding transcriptional regulator MntR,CH1034_RS05795
2,GCF_001404095.1,CXPD01000011.1,cds-WP_004179357.1,25540,26428,1,Protein Homology,,manganese catalase family protein,CH1034_RS07395
3,GCF_001404095.1,CXPD01000017.1,cds-WP_002910921.1,282182,282749,1,Protein Homology,mntP,manganese efflux pump MntP,CH1034_RS13540
4,GCF_001404095.1,CXPD01000021.1,cds-WP_032420502.1,144173,144995,1,Protein Homology,,manganese/iron ABC transporter ATP-binding pro...,CH1034_RS17460
5,GCF_001404095.1,CXPD01000021.1,cds-WP_032420503.1,144991,145840,1,Protein Homology,sitC,iron/manganese ABC transporter permease subuni...,CH1034_RS17465
6,GCF_001404095.1,CXPD01000032.1,cds-WP_040167306.1,20461,21322,-1,Protein Homology,sitC,iron/manganese ABC transporter permease subuni...,CH1034_RS23135
7,GCF_001404095.1,CXPD01000032.1,cds-WP_000075719.1,21318,22140,-1,Protein Homology,,manganese/iron ABC transporter ATP-binding pro...,CH1034_RS23140


## GenBank, GFF annotated by Prokka

In [17]:
in_file = "prokka_output/GCA_001404095.1_CH1034_genomic/PROKKA_05302023.gff" # file path
limit_info = dict(gff_type=["CDS"]) # select only CDS as feature type

res = [] # storage of results
for rec in GFF.parse(in_file, limit_info=limit_info): # rec is a sequence
    for f in rec.features: # f is a feature
        criteria1 = ('product' in f.qualifiers and 'manganese' in f.qualifiers['product'][0].lower())
        criteria2 = ('gene' in f.qualifiers and 'mnt' in f.qualifiers['gene'][0].lower())
        if criteria1 or criteria2:
            res.append([
                'GCA_001404095.1', # genome ID
                rec.id, # sequence ID
                f.id, # feature ID
                int(f.location.start), # start
                int(f.location.end),   # end
                f.location.strand, # strand
                f.qualifiers['source'][0], # source of this annotation
                f.qualifiers['gene'][0] if 'gene' in f.qualifiers else '', # gene name
                f.qualifiers['product'][0], # product
                f.qualifiers['locus_tag'][0] # locus tag
            ])
df_res_genbank_prokka = pd.DataFrame(res, columns=['GenomeID','SequenceID','FeatureID','StartPosition','EndPosition','Strand','Source','Name','Product','LocusTag'])
            
in_handle.close()

In [18]:
df_res_genbank_prokka

Unnamed: 0,GenomeID,SequenceID,FeatureID,StartPosition,EndPosition,Strand,Source,Name,Product,LocusTag
0,GCA_001404095.1,CXPD01000008.1,IENKBCOO_01103,222051,222177,-1,Prodigal:002006,mntS,Small protein MntS,IENKBCOO_01103
1,GCA_001404095.1,CXPD01000008.1,IENKBCOO_01104,222362,222836,1,Prodigal:002006,mntR,Transcriptional regulator MntR,IENKBCOO_01104
2,GCA_001404095.1,CXPD01000017.1,IENKBCOO_02569,282182,282749,1,Prodigal:002006,mntP,putative manganese efflux pump MntP,IENKBCOO_02569
3,GCA_001404095.1,CXPD01000018.1,IENKBCOO_03010,407809,409051,-1,Prodigal:002006,mntH,Divalent metal cation transporter MntH,IENKBCOO_03010
4,GCA_001404095.1,CXPD01000021.1,IENKBCOO_03279,96290,97169,-1,Prodigal:002006,mntA_1,Manganese-binding lipoprotein MntA,IENKBCOO_03279
5,GCA_001404095.1,CXPD01000021.1,IENKBCOO_03280,97176,98040,-1,Prodigal:002006,mntB_1,Manganese transport system membrane protein MntB,IENKBCOO_03280
6,GCA_001404095.1,CXPD01000021.1,IENKBCOO_03281,98036,98714,-1,Prodigal:002006,mntB_2,Manganese transport system ATP-binding protein...,IENKBCOO_03281
7,GCA_001404095.1,CXPD01000021.1,IENKBCOO_03326,144173,144995,1,Prodigal:002006,mntB_3,Manganese transport system ATP-binding protein...,IENKBCOO_03326
8,GCA_001404095.1,CXPD01000021.1,IENKBCOO_03327,144991,145840,1,Prodigal:002006,mntB_4,Manganese transport system membrane protein MntB,IENKBCOO_03327
9,GCA_001404095.1,CXPD01000021.1,IENKBCOO_03328,145833,146688,1,Prodigal:002006,mntB_5,Manganese transport system membrane protein MntB,IENKBCOO_03328


## RefSeq, GFF annotated by Prokka

In [20]:
in_file = "prokka_output/GCF_001404095.1_CH1034_genomic/PROKKA_05302023.gff" # file path
limit_info = dict(gff_type=["CDS"]) # select only CDS as feature type

res = [] # storage of results
for rec in GFF.parse(in_file, limit_info=limit_info): # rec is a sequence
    for f in rec.features: # f is a feature
        criteria1 = ('product' in f.qualifiers and 'manganese' in f.qualifiers['product'][0].lower())
        criteria2 = ('gene' in f.qualifiers and 'mnt' in f.qualifiers['gene'][0].lower())
        if criteria1 or criteria2:
            res.append([
                'GCF_001404095.1', # genome ID
                rec.id.lstrip('NZ_'), # sequence ID
                f.id, # feature ID
                int(f.location.start), # start
                int(f.location.end),   # end
                f.location.strand, # strand
                f.qualifiers['source'][0], # source of this annotation
                f.qualifiers['gene'][0] if 'gene' in f.qualifiers else '', # gene name
                f.qualifiers['product'][0], # product
                f.qualifiers['locus_tag'][0] # locus tag
            ])
df_res_refseq_prokka = pd.DataFrame(res, columns=['GenomeID','SequenceID','FeatureID','StartPosition','EndPosition','Strand','Source','Name','Product','LocusTag'])
            
in_handle.close()

In [21]:
df_res_refseq_prokka

Unnamed: 0,GenomeID,SequenceID,FeatureID,StartPosition,EndPosition,Strand,Source,Name,Product,LocusTag
0,GCF_001404095.1,CXPD01000008.1,BCDAKLAD_01103,222051,222177,-1,Prodigal:002006,mntS,Small protein MntS,BCDAKLAD_01103
1,GCF_001404095.1,CXPD01000008.1,BCDAKLAD_01104,222362,222836,1,Prodigal:002006,mntR,Transcriptional regulator MntR,BCDAKLAD_01104
2,GCF_001404095.1,CXPD01000017.1,BCDAKLAD_02569,282182,282749,1,Prodigal:002006,mntP,putative manganese efflux pump MntP,BCDAKLAD_02569
3,GCF_001404095.1,CXPD01000018.1,BCDAKLAD_03010,407809,409051,-1,Prodigal:002006,mntH,Divalent metal cation transporter MntH,BCDAKLAD_03010
4,GCF_001404095.1,CXPD01000021.1,BCDAKLAD_03279,96290,97169,-1,Prodigal:002006,mntA_1,Manganese-binding lipoprotein MntA,BCDAKLAD_03279
5,GCF_001404095.1,CXPD01000021.1,BCDAKLAD_03280,97176,98040,-1,Prodigal:002006,mntB_1,Manganese transport system membrane protein MntB,BCDAKLAD_03280
6,GCF_001404095.1,CXPD01000021.1,BCDAKLAD_03281,98036,98714,-1,Prodigal:002006,mntB_2,Manganese transport system ATP-binding protein...,BCDAKLAD_03281
7,GCF_001404095.1,CXPD01000021.1,BCDAKLAD_03326,144173,144995,1,Prodigal:002006,mntB_3,Manganese transport system ATP-binding protein...,BCDAKLAD_03326
8,GCF_001404095.1,CXPD01000021.1,BCDAKLAD_03327,144991,145840,1,Prodigal:002006,mntB_4,Manganese transport system membrane protein MntB,BCDAKLAD_03327
9,GCF_001404095.1,CXPD01000021.1,BCDAKLAD_03328,145833,146688,1,Prodigal:002006,mntB_5,Manganese transport system membrane protein MntB,BCDAKLAD_03328


## Compare GFF between GenBank and RefSeq annotated by Prokka (the samples)

In [24]:
df_res_genbank_prokka

Unnamed: 0,GenomeID,SequenceID,FeatureID,StartPosition,EndPosition,Strand,Source,Name,Product,LocusTag
0,GCA_001404095.1,CXPD01000008.1,IENKBCOO_01103,222051,222177,-1,Prodigal:002006,mntS,Small protein MntS,IENKBCOO_01103
1,GCA_001404095.1,CXPD01000008.1,IENKBCOO_01104,222362,222836,1,Prodigal:002006,mntR,Transcriptional regulator MntR,IENKBCOO_01104
2,GCA_001404095.1,CXPD01000017.1,IENKBCOO_02569,282182,282749,1,Prodigal:002006,mntP,putative manganese efflux pump MntP,IENKBCOO_02569
3,GCA_001404095.1,CXPD01000018.1,IENKBCOO_03010,407809,409051,-1,Prodigal:002006,mntH,Divalent metal cation transporter MntH,IENKBCOO_03010
4,GCA_001404095.1,CXPD01000021.1,IENKBCOO_03279,96290,97169,-1,Prodigal:002006,mntA_1,Manganese-binding lipoprotein MntA,IENKBCOO_03279
5,GCA_001404095.1,CXPD01000021.1,IENKBCOO_03280,97176,98040,-1,Prodigal:002006,mntB_1,Manganese transport system membrane protein MntB,IENKBCOO_03280
6,GCA_001404095.1,CXPD01000021.1,IENKBCOO_03281,98036,98714,-1,Prodigal:002006,mntB_2,Manganese transport system ATP-binding protein...,IENKBCOO_03281
7,GCA_001404095.1,CXPD01000021.1,IENKBCOO_03326,144173,144995,1,Prodigal:002006,mntB_3,Manganese transport system ATP-binding protein...,IENKBCOO_03326
8,GCA_001404095.1,CXPD01000021.1,IENKBCOO_03327,144991,145840,1,Prodigal:002006,mntB_4,Manganese transport system membrane protein MntB,IENKBCOO_03327
9,GCA_001404095.1,CXPD01000021.1,IENKBCOO_03328,145833,146688,1,Prodigal:002006,mntB_5,Manganese transport system membrane protein MntB,IENKBCOO_03328


In [25]:
df_res_refseq_prokka

Unnamed: 0,GenomeID,SequenceID,FeatureID,StartPosition,EndPosition,Strand,Source,Name,Product,LocusTag
0,GCF_001404095.1,CXPD01000008.1,BCDAKLAD_01103,222051,222177,-1,Prodigal:002006,mntS,Small protein MntS,BCDAKLAD_01103
1,GCF_001404095.1,CXPD01000008.1,BCDAKLAD_01104,222362,222836,1,Prodigal:002006,mntR,Transcriptional regulator MntR,BCDAKLAD_01104
2,GCF_001404095.1,CXPD01000017.1,BCDAKLAD_02569,282182,282749,1,Prodigal:002006,mntP,putative manganese efflux pump MntP,BCDAKLAD_02569
3,GCF_001404095.1,CXPD01000018.1,BCDAKLAD_03010,407809,409051,-1,Prodigal:002006,mntH,Divalent metal cation transporter MntH,BCDAKLAD_03010
4,GCF_001404095.1,CXPD01000021.1,BCDAKLAD_03279,96290,97169,-1,Prodigal:002006,mntA_1,Manganese-binding lipoprotein MntA,BCDAKLAD_03279
5,GCF_001404095.1,CXPD01000021.1,BCDAKLAD_03280,97176,98040,-1,Prodigal:002006,mntB_1,Manganese transport system membrane protein MntB,BCDAKLAD_03280
6,GCF_001404095.1,CXPD01000021.1,BCDAKLAD_03281,98036,98714,-1,Prodigal:002006,mntB_2,Manganese transport system ATP-binding protein...,BCDAKLAD_03281
7,GCF_001404095.1,CXPD01000021.1,BCDAKLAD_03326,144173,144995,1,Prodigal:002006,mntB_3,Manganese transport system ATP-binding protein...,BCDAKLAD_03326
8,GCF_001404095.1,CXPD01000021.1,BCDAKLAD_03327,144991,145840,1,Prodigal:002006,mntB_4,Manganese transport system membrane protein MntB,BCDAKLAD_03327
9,GCF_001404095.1,CXPD01000021.1,BCDAKLAD_03328,145833,146688,1,Prodigal:002006,mntB_5,Manganese transport system membrane protein MntB,BCDAKLAD_03328


## Combine all files

In [27]:
df_combined = pd.merge(
    df_res_genbank_ncbi[['SequenceID','StartPosition','EndPosition','Strand','Source','Name','Product']].rename({'Source':'Source_GenBank','Name':'Name_GenBank','Product':'Product_GenBank'},axis=1),
    df_res_refseq_ncbi[['SequenceID','StartPosition','EndPosition','Strand','Source','Name','Product']].rename({'Source':'Source_RefSeq','Name':'Name_RefSeq','Product':'Product_RefSeq'},axis=1),
    left_on=['SequenceID','StartPosition','EndPosition','Strand'],
    right_on=['SequenceID','StartPosition','EndPosition','Strand'],
    how='outer')
df_combined = pd.merge(
    df_combined,
    df_res_genbank_prokka[['SequenceID','StartPosition','EndPosition','Strand','Source','Name','Product']].rename({'Source':'Source_Prokka','Name':'Name_Prokka','Product':'Product_Prokka'},axis=1),
    left_on=['SequenceID','StartPosition','EndPosition','Strand'],
    right_on=['SequenceID','StartPosition','EndPosition','Strand'],
    how='outer')
df_combined = df_combined[['SequenceID','StartPosition','EndPosition','Strand','Name_GenBank','Name_RefSeq','Name_Prokka','Product_GenBank','Product_RefSeq','Product_Prokka','Source_GenBank','Source_RefSeq','Source_Prokka']]
df_combined.to_csv("GenBank_RefSeq_comparison.csv", index=False)
df_combined

Unnamed: 0,SequenceID,StartPosition,EndPosition,Strand,Name_GenBank,Name_RefSeq,Name_Prokka,Product_GenBank,Product_RefSeq,Product_Prokka,Source_GenBank,Source_RefSeq,Source_Prokka
0,CXPD01000008.1,222051,222177,-1,mntS,mntS,mntS,Small protein MntS,manganase accumulation protein MntS,Small protein MntS,EMBL,Protein Homology,Prodigal:002006
1,CXPD01000008.1,222362,222836,1,mntR,mntR,mntR,DNA-binding transcriptional regulator of mntH,manganese-binding transcriptional regulator MntR,Transcriptional regulator MntR,EMBL,Protein Homology,Prodigal:002006
2,CXPD01000018.1,407809,409051,-1,mntH,,mntH,manganese/divalent cation transporter,,Divalent metal cation transporter MntH,EMBL,,Prodigal:002006
3,CXPD01000032.1,1,739,-1,mntB,,scaC,Manganese transport system ATP-binding protein...,,Manganese import ATP-binding protein ScaC,EMBL,,Prodigal:002006
4,CXPD01000011.1,25540,26428,1,,,,,manganese catalase family protein,,,Protein Homology,
5,CXPD01000017.1,282182,282749,1,,mntP,mntP,,manganese efflux pump MntP,putative manganese efflux pump MntP,,Protein Homology,Prodigal:002006
6,CXPD01000021.1,144173,144995,1,,,mntB_3,,manganese/iron ABC transporter ATP-binding pro...,Manganese transport system ATP-binding protein...,,Protein Homology,Prodigal:002006
7,CXPD01000021.1,144991,145840,1,,sitC,mntB_4,,iron/manganese ABC transporter permease subuni...,Manganese transport system membrane protein MntB,,Protein Homology,Prodigal:002006
8,CXPD01000032.1,20461,21322,-1,,sitC,mntB_7,,iron/manganese ABC transporter permease subuni...,Manganese transport system membrane protein MntB,,Protein Homology,Prodigal:002006
9,CXPD01000032.1,21318,22140,-1,,,mntB_8,,manganese/iron ABC transporter ATP-binding pro...,Manganese transport system ATP-binding protein...,,Protein Homology,Prodigal:002006


In [29]:
# sitC
# ATGAGCTGGCTGCTGGAGCCGTTTTGCTATTACTATATGCTCAATGCGATGTGGGTGTCGGCGCTGGTGGGCGGCGTCTG
# TGCGTTTCTCTCCTGCTACCTGATGCTCAAAGGCTGGTCGCTGATTGGCGACGCCCTCTCCCACTCGATTGTGCCCGGCG
# TCGCCGGGGCCTATATGCTCGGCCTGCCCTTCGCGCTCGGCGCGTTTCTCTCCGGCGGCCTGGCGGCGGGCAGCATGCTG
# TTTTTGCAACAGCGCTCGCGGCTAAAAGAGGATGCCATTATCGGGCTGATCTTCTCCTCCTTCTTCGGGATCGGGCTGTT
# TATGGTGTCGCTGAATCCGACGTCGGTGAATATTCAGACCATCATCCTCGGCAATATTCTGGCCATCGCCCCGGAGGATA
# TTATCCAGCTGGCGGCGATCGGCTTTATCTCAATGGCGATTCTGCTGCTGAAGTGGAAAGACCTGATGGTGACCTTCTTT
# GATGAACACCACGCCCGCTCGATTGGCCTGAACACCCGCGGCCTGAAGCTGCTGTTCTTTACCCTGCTGGCCGCCTGCAC
# CGTGGCGGCGCTGCAGACCGTCGGCGCCTTTCTGGTCATCTGCCTGGTGGTCACTCCCGGGGCCACCGCGTGGCTGTTAA
# CCGATCGCTTCCCGCGCCTGCTGGCCATCGCCGTGGCTATCGGCAGCCTGACCAGTTTCTTCGGCGCCTGGCTCAGCTAC
# TATCTCGACGGCGCCACCGGCGGCATTATCGTGGTCGCGCAAACGCTGCTGTTCCTCATCACCTTTATCTTCGCGCCGAA
# GCACGGCCTGCTGGCCAGCCGCCGCCGCGCCAGGGAGGCCGCATGCTGA

# mntB
# ATGCACGACTACTCTTTGCGTTTTGCAAATCTGGCGCTGGGTTATGAGGGTTTACCTGCGATACAAAATATTACCGGGAC
# GATACAAAAAGGGTCCCTTACAGCCATCATCGGCCCTAACGGTTCTGGTAAGTCAACACTGCTTAAAGGTATCGCAGGGA
# TTCTGGCTCCGTTAAGTGGCTCCTGTACCGTAGAACCAAAGGCGCGCATTGCCTATCTGCCACAAATATCGGAATTGGAT
# CGCACTTTCCCTGCAACTGTTTCAGATTTAGTTTCCCTAGGTTTATGGCCTGAAAGAGGGTTATTCCGTCATCACAGAAT
# TGAGGATCGTAAACGGCTTACTGATGCATTGGGATCAGTAGGATTAGCAGGGTTTGAAAAAAGACAGCTTAGTGCATTAT
# CCGGGGGGCAGCTTCAACGCGCGCTCTTCGCACGGGTCATTCTTCAACAGGCAAACATCATCTTGCTTGATGAGCCTTTT
# AATGCCATTGACGCCACTACTATTGATGACTTGCTTGTGCTGATAAACCGCTGGCATGCTCAGAAACGTACTGTCTTAGC
# TGTCATGCATGATATCGGTCTGGTTAGAAATCATTTTCCACAGGCCATTTTGTTAAACGGTAAATTAGTGGCATGGGACG
# AAACAGAACAAGTTCTGCGCCATACCTCACTTCTACCGGCACAGAATATGGGCACAGCACGTGCTTTACCTGGGCAACAT
# GAGGCTACACATTCATGA