# Sequence Objects

In [120]:
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqUtils import GC

In [121]:
for sequence in SeqIO.parse("ls_orchid.fasta", "fasta"):
    print(sequence.id)
    print(repr(sequence.seq))
    print(len(sequence))
    break

gi|2765658|emb|Z78533.1|CIZ78533
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGATGAGACCGTGG...CGC')
740


In [122]:
dna = Seq("CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGATGAGACCGTGG")

dna.count("AA")
dna.count_overlap("AA")
GC(dna)

51.851851851851855

In [123]:
dna.complement()
dna.reverse_complement()

Seq('CCACGGTCTCATCAATGATCCTTCCGCAGGTTCACCTACGGAAACCTTGTTACG')

In [124]:
mrna = dna.transcribe()
dna = mrna.back_transcribe()

In [125]:
mrna.translate()
dna.translate()

Seq('RNKVSVGEPAEGSLMRPW')

# Sequence Annotation Objects

In [126]:
from Bio.SeqRecord import SeqRecord
from Bio.SeqFeature import SeqFeature, FeatureLocation

In [127]:
sequence = Seq("GTGACCCCAGGTCAGGCGGGGGCACCCGCTGAGTTTACGC")

record = SeqRecord(sequence, id="gi|2765658|emb|Z78533.1|CIZ78533")
record = SeqIO.read("NC_005816.fna", "fasta")

In [128]:
record
record.name
record.description
record.dbxrefs

[]

In [132]:
feature = SeqFeature(FeatureLocation(5, 18, strand=-1), type="gene")

SNP = 4350
for feature in SeqIO.read("NC_005816.gb", "genbank").features:
    if SNP in feature: 
        print(feature.location, feature.strand, feature.type, feature.qualifiers.get("db_xref"))

[0:9609](+) 1 source ['taxon:229193']
[4342:4780](+) 1 gene ['GeneID:2767712']
[4342:4780](+) 1 CDS ['GI:45478716', 'GeneID:2767712']
