In [1]:
import scipy

import numpy

import reportlab

from reportlab.graphics import renderPDF

from Bio.Seq import Seq

from Bio.Alphabet.IUPAC import unambiguous_dna

In [2]:
new_seq = Seq('GATCAGAAG', unambiguous_dna)

In [3]:
new_seq[0:2]

Seq('GA', IUPACUnambiguousDNA())

In [4]:
new_seq.translate()

Seq('DQK', IUPACProtein())

In [5]:
import Bio

You're reading a fastq file, right? You're most probably reinventing the wheel - you could just use Biopython, it has tools for dealing with common biology file formats. For instance see this tutorial, for doing something with fastq files - it looks basically like this:

from Bio import SeqIO
for record in SeqIO.parse("SRR020192.fastq", "fastq"):
    # do something with record, using record.seq, record.id etc
More on biopython SeqRecord objects here.

Here is another biopython fastq-processing tutorial, including a variant for doing this faster using a lower-level library, like this:

from Bio.SeqIO.QualityIO import FastqGeneralIterator
for title, seq, qual in FastqGeneralIterator(open("untrimmed.fastq")):
    # do things with title,seq,qual values
There's also the HTSeq package, with more deep-sequencing-specific tools, which I actually use more often.

By the way, if you don't know about Biostar already, you could take a look - it's a StackExchange-format site specifically for bioinformatics.

In [6]:
from Bio.Align.Applications import ClustalwCommandline

In [None]:
#First, let’s count the reads:

from Bio import SeqIO
count = 0
for rec in SeqIO.parse("../../Desktop/OS_FastQ_operations/run1152_lane12_read2_indexD711-D507-MM-188-1.fastq", "fastq"):
    count += 1
print("%i reads" % count)

In [None]:
#Now let’s do a simple filtering for a minimum PHRED quality of 20:

from Bio import SeqIO
good_reads = (rec for rec in \
              SeqIO.parse("../../Desktop/OS_FastQ_operations/run1152_lane12_read2_indexD711-D507-MM-188-1.fastq", "fastq") \
              if min(rec.letter_annotations["phred_quality"]) >= 20)
count = SeqIO.write(good_reads, "good_quality.fastq", "fastq")
print("Saved %i reads" % count)

In [None]:
from Bio import SeqIO
primer_reads = (rec for rec in \
                SeqIO.parse("../../Desktop/OS_FastQ_operations/run1152_lane12_read2_indexD711-D507-MM-188-1.fastq", "fastq") \
                if rec.seq.startswith("GATGACGGTGT"))
count = SeqIO.write(primer_reads, "with_primer.fastq", "fastq")
print("Saved %i reads" % count)