In [12]:
########################################
### Biopython tutorial
########################################

In [13]:
########################################
### Chapter 2: Quick start
########################################

In [14]:
## Seq: stores a sequence 
from Bio.Seq import Seq

# Save sequence
my_seq = Seq("AGTACACTGGT")

# Print sequence
print(my_seq)

# Print alphabet
my_seq.alphabet

# Print sequence
my_seq

# Print complementary sequence
my_seq.complement()

# Print reverse complementary sequence
my_seq.reverse_complement()

AGTACACTGGT


Seq('ACCAGTGTACT')

In [15]:
## SeqRecord: stores a sequence with additional annotation

# First save NCBI queries as
# FASTA formatted text files (.fasta) and
# GenBank formatted text files (.gbk)

In [16]:
########################################
### Tutorial with orchid data
########################################

In [17]:
## Load fasta data
from Bio import SeqIO

for seq_record in SeqIO.parse("ls_orchid.fasta", "fasta"):
    print(seq_record.id)
    print(repr(seq_record.seq))
    print(len(seq_record))

gi|2765658|emb|Z78533.1|CIZ78533
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGATGAGACCGTGG...CGC', SingleLetterAlphabet())
740
gi|2765657|emb|Z78532.1|CCZ78532
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAACAG...GGC', SingleLetterAlphabet())
753
gi|2765656|emb|Z78531.1|CFZ78531
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAGCAG...TAA', SingleLetterAlphabet())
748
gi|2765655|emb|Z78530.1|CMZ78530
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAAACAACAT...CAT', SingleLetterAlphabet())
744
gi|2765654|emb|Z78529.1|CLZ78529
Seq('ACGGCGAGCTGCCGAAGGACATTGTTGAGACAGCAGAATATACGATTGAGTGAA...AAA', SingleLetterAlphabet())
733
gi|2765652|emb|Z78527.1|CYZ78527
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAGTAG...CCC', SingleLetterAlphabet())
718
gi|2765651|emb|Z78526.1|CGZ78526
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAGTAG...TGT', SingleLetterAlphabet())
730
gi|2765650|emb|Z78525.1|CAZ78525
Seq('TGTTGAGATAGCAGAATATACATCGAGTGAATCCGGAGGACCTGTGGTTATTCG...GC

In [19]:
## Load GenBank data
from Bio import SeqIO

for seq_record in SeqIO.parse("ls_orchid.gbk", "genbank"):
    print(seq_record.id)
    print(repr(seq_record.seq))
    print(len(seq_record))

Z78533.1
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGATGAGACCGTGG...CGC', IUPACAmbiguousDNA())
740
Z78532.1
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAACAG...GGC', IUPACAmbiguousDNA())
753
Z78531.1
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAGCAG...TAA', IUPACAmbiguousDNA())
748
Z78530.1
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAAACAACAT...CAT', IUPACAmbiguousDNA())
744
Z78529.1
Seq('ACGGCGAGCTGCCGAAGGACATTGTTGAGACAGCAGAATATACGATTGAGTGAA...AAA', IUPACAmbiguousDNA())
733
Z78527.1
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAGTAG...CCC', IUPACAmbiguousDNA())
718
Z78526.1
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAGTAG...TGT', IUPACAmbiguousDNA())
730
Z78525.1
Seq('TGTTGAGATAGCAGAATATACATCGAGTGAATCCGGAGGACCTGTGGTTATTCG...GCA', IUPACAmbiguousDNA())
704
Z78524.1
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGATAGTAG...AGC', IUPACAmbiguousDNA())
740
Z78523.1
Seq('CGTAACCAGGTTTCCGTAGGTGAACCTGCGGCAGGATCATTGTTGAGACAGCAG...AAG', IUPAC

In [None]:
########################################
### Chapter 3: Sequence objects
########################################

In [None]:
## 3.1: Sequences and alphabets

## Availlable alphabets are stored in the Bio.Alphabet module
# IUPAC alphabets fo DNA, RNA, and proteins

## Proteins
# IUPACProtein
# ExtendedIUPACProtein

## DNA
# IUPACUnambiguousDNA
# IUPACAmbiguousDNA
# ExtendedIUPACDNA

## RNA
# IUPACAmbiguousRNA
# IUPACUnambiguousRNA

In [21]:
## Stating alphabet
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC

my_seq = Seq("AGTACACTGGT", IUPAC.unambiguous_dna)
my_seq
my_seq.alphabet

IUPACUnambiguousDNA()

In [26]:
## 3.2: Sequences act like strings
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC

my_seq = Seq("GATCG", IUPAC.unambiguous_dna)

# Print index of all letters in sequence
for index, letter in enumerate(my_seq):
    print("%i %s" % (index, letter))

# Print length of sequence
print(len(my_seq))

# Print first letter of sequence
print(my_seq[0])

# Count letters in a sequence
Seq("AAAA").count("AA")

# Calculate percent of GC in a sequence
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
from Bio.SeqUtils import GC

my_seq = Seq("GATCGATGGGCCTATATAGGATCGAAAATCGC", IUPAC.unambiguous_dna)

GC(my_seq)

0 G
1 A
2 T
3 C
4 G
5
G


46.875

In [28]:
## 3.3: Slicing a sequence
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC

# Get a slice from a sequence
my_seq = Seq("GATCGATGGGCCTATATAGGATCGAAAATCGC", IUPAC.unambiguous_dna) 
my_seq[4:12]

####### Start, stop, and stride
my_seq[0::3]
my_seq[1::3]
my_seq[2::3]

# Reverse string
my_seq[::-1]

Seq('CGCTAAAAGCTAGGATATATCCGGGTAGCTAG', IUPACUnambiguousDNA())

In [31]:
## 3.4 Turning Seq objects into strings

# Convert sequence into string
str(my_seq)

# Print string
print(my_seq)

# Convert sequence into string with the
# a placeholder (%s) and
# interpolation operator (%)
fasta_format_string = ">Name\n%s\n" % my_seq 
print(fasta_format_string)


GATCGATGGGCCTATATAGGATCGAAAATCGC
>Name
GATCGATGGGCCTATATAGGATCGAAAATCGC



In [39]:
## 3.5: Concatenating or adding sequences
from Bio.Alphabet import IUPAC
from Bio.Seq import Seq

# Save protein sequence
protein_seq = Seq("EVRNAK", IUPAC.protein)

# Save DNA sequence
dna_seq = Seq("ACGT", IUPAC.unambiguous_dna)

# Combining protein and DNA sequences requires generic alphabet
from Bio.Alphabet import generic_alphabet

# Assign alphabets
protein_seq.alphabet = generic_alphabet 
dna_seq.alphabet = generic_alphabet

# Combine protein and DNA sequences 
protein_seq + dna_seq

## Combining sequences using a for loop
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna

# Create list of sequences
list_of_seqs = [Seq("ACGT", generic_dna), Seq("AACC", generic_dna), Seq("GGTT", generic_dna)]

# Concatenate sequences
concatenated = Seq("", generic_dna)

for s in list_of_seqs:
    concatenated += s

# Print concatenated sequences
concatenated

## Combining sequences using the sum function
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna

# Create list of sequences
list_of_seqs = [Seq("ACGT", generic_dna), Seq("AACC", generic_dna), Seq("GGTT", generic_dna)] 

# Print concatenated sequences
sum(list_of_seqs, Seq("", generic_dna))

Seq('ACGTAACCGGTT', DNAAlphabet())

In [40]:
## 3.6: Changing case
from Bio.Seq import Seq
from Bio.Alphabet import generic_dna

# Create sequence
dna_seq = Seq("acgtACGT", generic_dna) 

# Change to uppercase
dna_seq.upper()

# Change to lower case
dna_seq.lower()

Seq('acgtacgt', DNAAlphabet())

In [41]:
## 3.7: Nucleotide sequences and (reverse) complements
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC

# Create sequence
my_seq = Seq("GATCGATGGGCCTATATAGGATCGAAAATCGC", IUPAC.unambiguous_dna) 

# Create complementary sequence
my_seq.complement()

# Create reserve complementary sequence
my_seq.reverse_complement()

# Create a reverse sequence
my_seq[::-1]

Seq('CGCTAAAAGCTAGGATATATCCGGGTAGCTAG', IUPACUnambiguousDNA())

In [None]:
## 3.8: Transcription

# Transcription works form the template strand (3' to 5')
# then does a reverse complement to give a mRNA (TCAG to CUGA)

# Bioinformatics works with the coding strand because we can
# get the mRNA by switching T to U

from Bio.Seq import Seq
from Bio.Alphabet import IUPAC

# Create sequence
coding_dna = Seq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG", IUPAC.unambiguous_dna) 
coding_dna

# Create template strand
template_dna = coding_dna.reverse_complement()
template_dna

# Create mRNA sequence from DNA
messenger_rna = coding_dna.transcribe()

# Create mRNA sequence frorm DNA
template_dna.reverse_complement().transcribe()

# Create DNA sequence from mRNA
messenger_rna = Seq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG", IUPAC.unambiguous_rna) 
messenger_rna
messenger_rna.back_transcribe()

In [42]:
## 3.9: Translation

from Bio.Seq import Seq
from Bio.Alphabet import IUPAC

# Translate mRNA into protein sequence
messenger_rna = Seq("AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG", IUPAC.unambiguous_rna)
messenger_rna
messenger_rna.translate()

# Translate DNA into protein sequence
coding_dna = Seq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG", IUPAC.unambiguous_dna) 
coding_dna
coding_dna.translate()

# Define translation tables
coding_dna.translate(table="Vertebrate Mitochondrial")
coding_dna.translate(table=2)

# Translate to first stop coden
coding_dna.translate()
coding_dna.translate(to_stop=True)
coding_dna.translate(table=2)
coding_dna.translate(table=2, to_stop=True)

Seq('MAIVMGR*KGAR*', HasStopCodon(IUPACProtein(), '*'))

In [44]:
## 3.10: Translation tables

# Skip for now

In [46]:
## 3.11: Comparing Seq objects

# Skip for now

In [48]:
## 3.12: MutableSeq objects

# Skip for now

In [50]:
## 3.13: UnknownSeq objects

# Skip for now

In [53]:
## 3.14: Working with strings directly

# Skip for now

In [None]:
########################################
### Chapter 4: Sequence annotation objects
########################################

In [54]:
## 4.1: The SeqRecord object

## Attributes (see page 34 for description)
# .seq
# .id
# .name
# .description
# .letter_annotations
# .annotations
# .features
# .dbxrefs

In [57]:
## 4.2: Creating a SeqRecord

# Bio.SeqIO.parse = for multiple records
# Bio.SeqIO.read = for single records

# Skip for now

In [59]:
## 4.3: Feature, location, and position objects

## SeqFeature

# SeqRecord = a gene sequence between two points
# .type
# .location
    # .ref
    # .ref_db
    # .strand
# .qualifiers
# .sub_features

## Positions vs. locations
# Position = single position of a sequence
# Location = region of a sequence

### Might need to revisit this to get particular SNPs for the COI and COII genes
### Page 41

In [61]:
## 4.4: Comparison

from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

# Create sequences
record1 = SeqRecord(Seq("ACGT"), id="test") 
record2 = SeqRecord(Seq("ACGT"), id="test")

# Compare sequences
record1.id == record2.id
record1.seq == record2.seq

True

In [63]:
## 4.5: References

# Bio.SeqFeature.Reference stores the relevant information about
# a reference as attributes of an object

In [64]:
## 4.6: The format method

from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Alphabet import generic_protein

# Create sequence record
record = SeqRecord(
    Seq(
        "MMYQQGCFAGGTVLRLAKDLAENNRGARVLVVCSEITAVTFRGPSETHLDSMVGQALFGD"
        "GAGAVIVGSDPDLSVERPLYELVWTGATLLPDSEGAIDGHLREVGLTFHLLKDVPGLISK"
        "NIEKSLKEAFTPLGISDWNSTFWIAHPGGPAILDQVEAKLGLKEEKMRATREVLSEYGNM"
        "SSAC",
generic_protein, ),
id="gi|14150838|gb|AAK54648.1|AF376133_1",
    description="chalcone synthase [Cucumis sativus]",
)

# Print record
print(record.format("fasta"))

>gi|14150838|gb|AAK54648.1|AF376133_1 chalcone synthase [Cucumis sativus]
MMYQQGCFAGGTVLRLAKDLAENNRGARVLVVCSEITAVTFRGPSETHLDSMVGQALFGD
GAGAVIVGSDPDLSVERPLYELVWTGATLLPDSEGAIDGHLREVGLTFHLLKDVPGLISK
NIEKSLKEAFTPLGISDWNSTFWIAHPGGPAILDQVEAKLGLKEEKMRATREVLSEYGNM
SSAC



In [65]:
## 4.7 to 4.9

### Revisit this when splicing the genes

In [67]:
########################################
### Chapter 5: Sequence input/output
########################################

In [69]:
### Background

# Load help
from Bio import SeqIO 
help(SeqIO)

# SeqRecord = small files
# SimpleFastaParser = large files
# FastqGeneralIterator = large files

Help on package Bio.SeqIO in Bio:

NAME
    Bio.SeqIO - Sequence input/output as SeqRecord objects.

DESCRIPTION
    Bio.SeqIO is also documented at SeqIO_ and by a whole chapter in our tutorial:
    
      - `HTML Tutorial`_
      - `PDF Tutorial`_
    
    .. _SeqIO: http://biopython.org/wiki/SeqIO
    .. _`HTML Tutorial`: http://biopython.org/DIST/docs/tutorial/Tutorial.html
    .. _`PDF Tutorial`: http://biopython.org/DIST/docs/tutorial/Tutorial.pdf
    
    Input
    -----
    The main function is Bio.SeqIO.parse(...) which takes an input file handle
    (or in recent versions of Biopython alternatively a filename as a string),
    and format string.  This returns an iterator giving SeqRecord objects:
    
    >>> from Bio import SeqIO
    >>> for record in SeqIO.parse("Fasta/f002", "fasta"):
    ...     print("%s %i" % (record.id, len(record)))
    gi|1348912|gb|G26680|G26680 633
    gi|1348917|gb|G26685|G26685 413
    gi|1592936|gb|G29385|G29385 471
    
    Note that the parse(

In [74]:
## 5.1: Parsing or reading sequences

# Bio.SeqIO.parse() = reads sequence data as SeqRecord objects
# ("file", "format", "alphabet") = https://biopython.org/wiki/SeqIO

from Bio import SeqIO

# Fasta file
for seq_record in SeqIO.parse("ls_orchid.fasta", "fasta"):
    print(seq_record.id)
    print(repr(seq_record.seq))
    print(len(seq_record))

# GenBank file
for seq_record in SeqIO.parse("ls_orchid.gbk", "genbank"):
    print(seq_record.id)
    print(repr(seq_record.seq))
    print(len(seq_record))

gi|2765658|emb|Z78533.1|CIZ78533
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGATGAGACCGTGG...CGC', SingleLetterAlphabet())
740
gi|2765657|emb|Z78532.1|CCZ78532
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAACAG...GGC', SingleLetterAlphabet())
753
gi|2765656|emb|Z78531.1|CFZ78531
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAGCAG...TAA', SingleLetterAlphabet())
748
gi|2765655|emb|Z78530.1|CMZ78530
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAAACAACAT...CAT', SingleLetterAlphabet())
744
gi|2765654|emb|Z78529.1|CLZ78529
Seq('ACGGCGAGCTGCCGAAGGACATTGTTGAGACAGCAGAATATACGATTGAGTGAA...AAA', SingleLetterAlphabet())
733
gi|2765652|emb|Z78527.1|CYZ78527
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAGTAG...CCC', SingleLetterAlphabet())
718
gi|2765651|emb|Z78526.1|CGZ78526
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGTTGAGACAGTAG...TGT', SingleLetterAlphabet())
730
gi|2765650|emb|Z78525.1|CAZ78525
Seq('TGTTGAGATAGCAGAATATACATCGAGTGAATCCGGAGGACCTGTGGTTATTCG...GC

In [76]:
# Extract unique identifers
identifiers = [seq_record.id for seq_record in SeqIO.parse("ls_orchid.gbk", "genbank")] 
identifiers

['Z78533.1',
 'Z78532.1',
 'Z78531.1',
 'Z78530.1',
 'Z78529.1',
 'Z78527.1',
 'Z78526.1',
 'Z78525.1',
 'Z78524.1',
 'Z78523.1',
 'Z78522.1',
 'Z78521.1',
 'Z78520.1',
 'Z78519.1',
 'Z78518.1',
 'Z78517.1',
 'Z78516.1',
 'Z78515.1',
 'Z78514.1',
 'Z78513.1',
 'Z78512.1',
 'Z78511.1',
 'Z78510.1',
 'Z78509.1',
 'Z78508.1',
 'Z78507.1',
 'Z78506.1',
 'Z78505.1',
 'Z78504.1',
 'Z78503.1',
 'Z78502.1',
 'Z78501.1',
 'Z78500.1',
 'Z78499.1',
 'Z78498.1',
 'Z78497.1',
 'Z78496.1',
 'Z78495.1',
 'Z78494.1',
 'Z78493.1',
 'Z78492.1',
 'Z78491.1',
 'Z78490.1',
 'Z78489.1',
 'Z78488.1',
 'Z78487.1',
 'Z78486.1',
 'Z78485.1',
 'Z78484.1',
 'Z78483.1',
 'Z78482.1',
 'Z78481.1',
 'Z78480.1',
 'Z78479.1',
 'Z78478.1',
 'Z78477.1',
 'Z78476.1',
 'Z78475.1',
 'Z78474.1',
 'Z78473.1',
 'Z78472.1',
 'Z78471.1',
 'Z78470.1',
 'Z78469.1',
 'Z78468.1',
 'Z78467.1',
 'Z78466.1',
 'Z78465.1',
 'Z78464.1',
 'Z78463.1',
 'Z78462.1',
 'Z78461.1',
 'Z78460.1',
 'Z78459.1',
 'Z78458.1',
 'Z78457.1',
 'Z78456.1',

In [78]:
from Bio import SeqIO
records = list(SeqIO.parse("ls_orchid.gbk", "genbank"))
print("Found %i records" % len(records))

Found 94 records


In [85]:
# Print first record
record_iterator = SeqIO.parse("ls_orchid.gbk", "genbank")
first_record = next(record_iterator)
print(first_record)

# Print common name
print(first_record.annotations["source"])

# Print scientific name
print(first_record.annotations["organism"])

ID: Z78533.1
Name: Z78533
Description: C.irapeanum 5.8S rRNA gene and ITS1 and ITS2 DNA
Number of features: 5
/molecule_type=DNA
/topology=linear
/data_file_division=PLN
/date=30-NOV-2006
/accessions=['Z78533']
/sequence_version=1
/gi=2765658
/keywords=['5.8S ribosomal RNA', '5.8S rRNA gene', 'internal transcribed spacer', 'ITS1', 'ITS2']
/source=Cypripedium irapeanum
/organism=Cypripedium irapeanum
/taxonomy=['Eukaryota', 'Viridiplantae', 'Streptophyta', 'Embryophyta', 'Tracheophyta', 'Spermatophyta', 'Magnoliophyta', 'Liliopsida', 'Asparagales', 'Orchidaceae', 'Cypripedioideae', 'Cypripedium']
/references=[Reference(title='Phylogenetics of the slipper orchids (Cypripedioideae: Orchidaceae): nuclear rDNA ITS sequences', ...), Reference(title='Direct Submission', ...)]
Seq('CGTAACAAGGTTTCCGTAGGTGAACCTGCGGAAGGATCATTGATGAGACCGTGG...CGC', IUPACAmbiguousDNA())
Cypripedium irapeanum
Cypripedium irapeanum
[SeqFeature(FeatureLocation(ExactPosition(0), ExactPosition(740), strand=1), type='sour

In [82]:
# Create empty list
all_species = []

# Print all species names with full genus
for seq_record in SeqIO.parse("ls_orchid.gbk", "genbank"):
    all_species.append(seq_record.annotations["organism"])
    print(all_species)

# Print all species names with abbreviated genus
for seq_record in SeqIO.parse("ls_orchid.fasta", "fasta"):
    all_species.append(seq_record.description.split()[1])
    print(all_species)

['Cypripedium irapeanum']
['Cypripedium irapeanum', 'Cypripedium californicum']
['Cypripedium irapeanum', 'Cypripedium californicum', 'Cypripedium fasciculatum']
['Cypripedium irapeanum', 'Cypripedium californicum', 'Cypripedium fasciculatum', 'Cypripedium margaritaceum']
['Cypripedium irapeanum', 'Cypripedium californicum', 'Cypripedium fasciculatum', 'Cypripedium margaritaceum', 'Cypripedium lichiangense']
['Cypripedium irapeanum', 'Cypripedium californicum', 'Cypripedium fasciculatum', 'Cypripedium margaritaceum', 'Cypripedium lichiangense', 'Cypripedium yatabeanum']
['Cypripedium irapeanum', 'Cypripedium californicum', 'Cypripedium fasciculatum', 'Cypripedium margaritaceum', 'Cypripedium lichiangense', 'Cypripedium yatabeanum', 'Cypripedium guttatum']
['Cypripedium irapeanum', 'Cypripedium californicum', 'Cypripedium fasciculatum', 'Cypripedium margaritaceum', 'Cypripedium lichiangense', 'Cypripedium yatabeanum', 'Cypripedium guttatum', 'Cypripedium acaule']
['Cypripedium irapeanum

In [87]:
########################################
### Chapter 6: Multiple sequence alignment objects
########################################

In [None]:
## 6.1: Parsing or reading sequence alignments

# I think I need to use Bio.AlignIO.read() for "single alignments"
# if only doing the COI gene

## Bio.AlignIO.read()
# First argument = handle; an open file or filename
# Second argument = lower case string specifying the alignment format (http://biopython.org/wiki/AlignIO)
# Third argument = seq_count (see Section 6.1.3)
# Fourth argument = alphabet

In [None]:
## 6.4: Alignment tools

## Steps

# Bio.SeqIO = prepare input file
# Muscle = align file
# Bio.AlignIO = read output file

## Definitions 
#import Bio.Align.Applications
#dir(Bio.Align.Applications)

In [None]:
### Muscle

## Help
#from Bio.Align.Applications import MuscleCommandline 
#help(MuscleCommandline)


In [None]:
########################################
### Chapter 9: Accessing NCBI's Entrez databases
########################################