### 0. Import required packages, create handle and get sequence 'fasta'

In [3]:
from Bio import Entrez, Seq, SeqIO
# from Bio.Alphabet import IUPAC

In [4]:
Entrez.email = "tquangbk@gmail.com" 
hdl = Entrez.efetch(db='nucleotide', id=['NM_002299'], rettype='fasta')  # Lactase gene
#for l in hdl:
#    print l
seq = SeqIO.read(hdl, 'fasta')

### 1. Get sequence of interest
The SeqIO.write function takes a list of sequences to write (not just a single one). Be careful with this idiom. If you want to write many sequences (and you could easily write millions with NGS), do not use a list (as shown in the preceding code), because this will allocate massive amounts of memory. Either use an iterator, or use the SeqIO.write function several times with a subset of the sequence on each write.

In [6]:
w_seq = seq[11:5795]
w_seq

SeqRecord(seq=Seq('GAAAATGGAGCTGTCTTGGCATGTAGTCTTTATTGCCCTGCTAAGTTTTTCATG...ATT'), id='NM_002299.4', name='NM_002299.4', description='NM_002299.4 Homo sapiens lactase (LCT), mRNA', dbxrefs=[])

In [7]:
w_hdl = open('example.fasta', 'w')
SeqIO.write([w_seq], w_hdl, 'fasta')
w_hdl.close()

### 2. Reading saved sequence
In most situations, you will actually have the sequence on the disk, so you will be interested in reading it:

In [35]:
recs = SeqIO.parse('example.fasta', 'fasta')
for rec in recs:
    seq = rec.seq
    print(rec.description)
    print(seq[:10])
#     print(seq.alphabet)

NM_002299.4 Homo sapiens lactase (LCT), mRNA
GAAAATGGAG


### 3. Get sequence with more informative alphabet

In [36]:
seq = Seq.Seq(str(seq), Seq.IUPACData.ambiguous_dna_complement)
seq

Seq('GAAAATGGAGCTGTCTTGGCATGTAGTCTTTATTGCCCTGCTAAGTTTTTCATG...ATT')

In [37]:
# seq = Seq.Seq(str(seq), IUPAC.unambiguous_dna)
# seq

###  4. Transcribe unambiguous DNA

In [38]:
print((seq[:12], seq[-12:]))
rna = seq.transcribe()
rna

(Seq('GAAAATGGAGCT'), Seq('GGTGTCTTCATT'))


Seq('GAAAAUGGAGCUGUCUUGGCAUGUAGUCUUUAUUGCCCUGCUAAGUUUUUCAUG...AUU')

### 5. Translate our gene into a protein

In [39]:
prot = seq.translate()
prot

Seq('ENGAVLACSLYCPAKFFMLGVRLGV**KFHFHRWSSNQ*LAAQPEWSPGRPEF*...VFI')