# A: Make a SeqRecord

In [1]:
# 1. Create a sequence

from Bio.Seq import Seq
my_sequence = Seq("GATCGATCGATCGATCGATCGATCGATCGATC")

In [2]:
# 2. Create a SeqRecord and assign the sequence to it

from Bio.SeqRecord import SeqRecord
my_sequence_record = SeqRecord(my_sequence)

In [3]:
# 3. Assign an alphabet to the sequence (in this case DNA)

from Bio.Alphabet import generic_dna
my_sequence_record.seq.alphabet = generic_dna

This is the minimum required info for BioPython to be able to output the SeqRecord in Genbank format.You probably would want to add other info (e.g. locus, organism, date etc)

In [4]:
#optional: print the SeqRecord to STDOUT in genbank format.. note there are no features on it yet.

print "\nThis bit is the SeqRecord, printed out in genbank format, with no features added.\n"
print(my_sequence_record.format("gb"))



This bit is the SeqRecord, printed out in genbank format, with no features added.

LOCUS       .                         32 bp    DNA              UNK 01-JAN-1980
DEFINITION  .
ACCESSION   <unknown id>
VERSION     <unknown id>
KEYWORDS    .
SOURCE      .
  ORGANISM  .
            .
FEATURES             Location/Qualifiers
ORIGIN
        1 gatcgatcga tcgatcgatc gatcgatcga tc
//



# B: Make a SeqFeature

In [5]:
#1. Create a start location and end location for the feature
#    Obviously this can be AfterPosition, BeforePosition etc.,
#    to handle ambiguous or unknown positions

from Bio import SeqFeature
my_start_pos = SeqFeature.ExactPosition(2)
my_end_pos = SeqFeature.ExactPosition(6)

In [6]:
# 2. Use the locations do define a FeatureLocation

from Bio.SeqFeature import FeatureLocation
my_feature_location = FeatureLocation(my_start_pos,my_end_pos)

In [7]:
# 3. Define a feature type as a text string 
#     (you can also just add the type when creating the SeqFeature)

my_feature_type = "CDS"

In [8]:
# 4. Create a SeqFeature

from Bio.SeqFeature import SeqFeature
my_feature = SeqFeature(my_feature_location,type=my_feature_type)

In [9]:
# 5. Append your newly created SeqFeature to your SeqRecord

my_sequence_record.features.append(my_feature)

In [10]:
#optional: print the SeqRecord to STDOUT in genbank format, with your new feature added.

print "\nThis bit is the SeqRecord, printed out in genbank format, with a feature added.\n"
print(my_sequence_record.format("gb"))


This bit is the SeqRecord, printed out in genbank format, with a feature added.

LOCUS       .                         32 bp    DNA              UNK 01-JAN-1980
DEFINITION  .
ACCESSION   <unknown id>
VERSION     <unknown id>
KEYWORDS    .
SOURCE      .
  ORGANISM  .
            .
FEATURES             Location/Qualifiers
     CDS             3..6
ORIGIN
        1 gatcgatcga tcgatcgatc gatcgatcga tc
//



# C: Overwrite an existing SeqFeature

In [11]:
# 1. Create a start location and end location for the feature.. 
#    This bit is obviously a repeat of "B: Make a SeqFeature" above, 
#   normally I'd pull it out to a function, but I'm trying to be explicit here

from Bio import SeqFeature
my_start_pos = SeqFeature.ExactPosition(3)
my_end_pos = SeqFeature.ExactPosition(7)

In [12]:
# 2. Use the locations do define a FeatureLocation

from Bio.SeqFeature import FeatureLocation
my_feature_location2 = FeatureLocation(my_start_pos,my_end_pos)

In [13]:
## 3. Define a feature type as a text string 
##    (or you can also just add the type when creating the SeqFeature)

my_feature_type2 = "ABC"

In [14]:
## 4. Create a SeqFeature

from Bio.SeqFeature import SeqFeature
my_feature2 = SeqFeature(my_feature_location2,type=my_feature_type2)
my_sequence_record.features[0]=my_feature2

In [15]:
#optional: print the SeqRecord to STDOUT in genbank format, with your new feature changed.

print "\nThis bit is the SeqRecord, printed out in genbank format, with a feature changed.\n"
print(my_sequence_record.format("gb"))


This bit is the SeqRecord, printed out in genbank format, with a feature changed.

LOCUS       .                         32 bp    DNA              UNK 01-JAN-1980
DEFINITION  .
ACCESSION   <unknown id>
VERSION     <unknown id>
KEYWORDS    .
SOURCE      .
  ORGANISM  .
            .
FEATURES             Location/Qualifiers
     ABC             4..7
ORIGIN
        1 gatcgatcga tcgatcgatc gatcgatcga tc
//

