# Pydna / Biopython behaviour

Here some code chunks showing some important points from biopython / pydna

## How to know if a sequence is circular or linear

This is important, and might not be straightforward from certain formats.

In [40]:
from pydna.parsers import parse as pydna_parse
from Bio.SeqIO import read as seqio_read
from pydna.dseqrecord import Dseqrecord
# Pydna reader gets it right from the genbank file:
dseqrecords = pydna_parse('../plasmids/addgene-plasmid-39296-sequence-49545.gbk')
print('pydna with gb:',dseqrecords[0].circular)

seqrecord = seqio_read('../plasmids/addgene-plasmid-39296-sequence-49545.dna','snapgene')

# Simple conversion will not get right the fact that it is circular
dseqrecord = Dseqrecord(seqrecord)
print('seqio with snapgene, then convert to dseqrecord:',dseqrecord.circular)

# Check in the properties to tell if the topology is circular
iscircular = 'topology' in seqrecord.annotations.keys() and seqrecord.annotations['topology'] == 'circular'
dseqrecord = Dseqrecord(seqrecord, circular=iscircular)
print('seqio with snapgene, then convert to dseqrecord:',dseqrecord.circular)


pydna with gb: True
seqio with snapgene, then convert to dseqrecord: False
seqio with snapgene, then convert to dseqrecord: True


## Restriction enzymes

In [2]:
# List of commercial enzymes
from Bio.Restriction.Restriction import CommOnly
from pydna.dseqrecord import Dseqrecord

enzyme = CommOnly.format('EcoRI')

# If we want to know where the enzymes cut in the original sequence
# we use the dseq.cut() method rather than the dseqrecord.cut() one,
# the resulting fragments have a dseq.pos parameter that indicates the
# position of the fragment in the original sequence

seq = 'AAAAAAGAATTCTTTTTTTT'
dseq = Dseqrecord(seq,circular=False)
cutted_list = dseq.seq.cut([enzyme])    
print('linear')
for i in cutted_list:
    print(i.pos)

seq = 'AAAAAAGAATTCTTTTTTAAAGAATTCCCCCCC'
dseq = Dseqrecord(seq,circular=True)
cutted_list = dseq.seq.cut([enzyme])
print('circular')
for i in cutted_list:
    print(i.pos)


linear
0
7
circular
22
40


## Requesting genes to Genbank

What happens when Genbank is down?

In [22]:
from pydna.genbank import Genbank
from pydna.dseqrecord import Dseqrecord
# The HTTPError is taken from this library
from urllib.error import HTTPError, URLError


gb = Genbank("example@gmail.com")

# A request when genbank is down (returns 'HTTP Error 500: Internal Server Error')
try:
    seq = Dseqrecord(gb.nucleotide('NM_001018957.2'))
    print(seq)
except HTTPError as exception:
    print(exception)
    print(repr(exception))
except URLError as exception:
    print(exception.reason, type(exception.reason))



    

[Errno 8] nodename nor servname provided, or not known <class 'socket.gaierror'>


In [15]:
# A request when the id does not exist
try:
    seq = Dseqrecord(gb.nucleotide('doesntexist'))
except HTTPError as exception:
    print(exception)
    print(repr(exception))
    

HTTP Error 400: Bad Request
<HTTPError 400: 'Bad Request'>


## Ligation with sticky ends


In [24]:
from pydna.dseqrecord import Dseqrecord
from Bio.Restriction.Restriction import CommOnly

# When only one orientation is possible

enzyme = CommOnly.format('EcoRI')
seq = 'AAAAAAGAATTCTTTTTTTT'
dseq = Dseqrecord(seq,circular=False)
cutted_list = dseq.seq.cut([enzyme])

print('Joining sticky ends----')
print(repr(cutted_list[0] + cutted_list[1]))

# Order matters (This is a blunt end join of the ends)
print('Joining blunt ends----')
print(repr(cutted_list[1] + cutted_list[0]))

# When two orientations are possible
seq = 'AAAAAAGAATTCTAAACCCTGAATTC'
dseq = Dseqrecord(seq,circular=True)
cutted_list = dseq.seq.cut([enzyme])

print()
print(repr(cutted_list[0]))
print(repr(cutted_list[1]))

# Note how order matters in the sum when a fragment can be inserted in both directions
pre_assemble1 = cutted_list[0] + cutted_list[1]
pre_assemble2 = cutted_list[1] + cutted_list[0]

print()
print('Assembly of the same fragment in both directions ----')
print(pre_assemble1.looped())
print(pre_assemble2.looped())





Joining sticky ends----
Dseq(-20)
AAAAAAGAATTCTTTTTTTT
TTTTTTCTTAAGAAAAAAAA
Joining blunt ends----
Dseq(-24)
AATTCTTTTTTTTAAAAAAG
    GAAAAAAAATTTTTTCTTAA

Dseq(-16)
AATTCAAAAAAG
    GTTTTTTCTTAA
Dseq(-18)
AATTCTAAACCCTG
    GATTTGGGACTTAA

Assembly of the same fragment in both directions ----
AATTCAAAAAAGAATTCTAAACCCTG
AATTCTAAACCCTGAATTCAAAAAAG


In [8]:
# eliminating duplicates

inp = [0,1,2,3,1,1,3,4,5]
out = []
while(len(inp)):
    value = inp.pop()
    if value not in out:
        out.append(value)
print(out)

# Custom set unique function

class Hello:
    def __init__(self,i):
        self.val = i
    def __eq__(self,other):
        return self.val == other.val
    def __str__(self):
        return str(self.val)

the_list = [Hello(1),Hello(2),Hello(2)]
print(the_list)



[5, 4, 3, 1, 2, 0]
[<__main__.Hello object at 0x10683faf0>, <__main__.Hello object at 0x10683f2b0>, <__main__.Hello object at 0x10683f9a0>]


In [22]:
from itertools import chain
list(chain.from_iterable([[1, -1], [2, -2], [3, -3]]))
chain([1,2,3,4,5])

# sum([1,2,3,4])
sum([[1, -1], [2, -2], [3, -3]],[])

a = [1]
a[1:]

sum(a[1:],a[0:1])

[1]