# An introduction to solving biological problems with Python

## Session 2.4: BioPython

## Using third party library, BioPython

### Sequence manipulation

In [None]:
# Creating sequence
from Bio.Seq import Seq
my_seq = Seq("AGTACACTGGT")
print(my_seq)
print(my_seq[10])
print(my_seq[1:5])
print(len(my_seq))
print(my_seq.count( "A" ))

In [None]:
# Calculate the molecular weight
from Bio.SeqUtils import GC, molecular_weight
print(GC( my_seq ))
print(molecular_weight( my_seq ))

In [None]:
from Bio.SeqUtils import seq3
print(seq3( my_seq ))

In [None]:
from Bio.Alphabet import IUPAC
my_dna = Seq("AGTACATGACTGGTTTAG", IUPAC.unambiguous_dna)
print(my_dna)
print(my_dna.alphabet)

In [None]:
my_dna.complement()

In [None]:
my_dna.reverse_complement()

In [None]:
my_dna.translate()

### FASTA files

In [None]:
with open( "data/glpa.fa" ) as fileObj:
    print(fileObj.read())

In [None]:
# Reading FASTA files
from Bio import SeqIO

fileObj = open( "data/glpa.fa", "rU" )

for protein in SeqIO.parse(fileObj, 'fasta'):
  print(protein.id)
  print(protein.seq)

In [None]:
# Writing FASTA files
from Bio.SeqRecord import SeqRecord
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC

sequence = 'MYGKIIFVLLLSEIVSISASSTTGVAMHTSTSSSVTKSYISSQTNDTHKRDTYAATPRAHEVSEISVRTVYPPEEETGERVQLAHHFSEPEITLIIFG'

fileObj = open( "biopython.fa", "w")
  
seqObj = Seq(sequence, IUPAC.protein)
proteinObjs = [SeqRecord(seqObj, id="MYID", description='my description'),]

SeqIO.write(proteinObjs, fileObj,  'fasta')

fileObj.close()

with open( "biopython.fa" ) as fileObj:
    print(fileObj.read())

In [None]:
# Read FASTA file from NCBI GenBank
from Bio import Entrez

Entrez.email = 'A.N.Other@example.com'
socketObj = Entrez.efetch(db="protein", rettype="fasta", id="71066805")
dnaObj = SeqIO.read(socketObj, "fasta")
socketObj.close()

print(dnaObj.description)
print(dnaObj.seq)

In [None]:
# Read SWISSPROT record
from Bio import ExPASy

socketObj = ExPASy.get_sprot_raw('HBB_HUMAN')
proteinObj = SeqIO.read(socketObj, "swiss")
socketObj.close()

print(proteinObj.description)
print(proteinObj.seq)

## Exercises

In [1]:
# add BioPython exercise for the reverse complement function 
# by writing second function using BioPython doing the same thing