# Pydna / Biopython behaviour

Here some code chunks showing some important points from biopython / pydna

## How to know if a sequence is circular or linear

This is important, and might not be straightforward from certain formats.

In [40]:
from pydna.parsers import parse as pydna_parse
from Bio.SeqIO import read as seqio_read
from pydna.dseqrecord import Dseqrecord
# Pydna reader gets it right from the genbank file:
dseqrecords = pydna_parse('../plasmids/addgene-plasmid-39296-sequence-49545.gbk')
print('pydna with gb:',dseqrecords[0].circular)

seqrecord = seqio_read('../plasmids/addgene-plasmid-39296-sequence-49545.dna','snapgene')

# Simple conversion will not get right the fact that it is circular
dseqrecord = Dseqrecord(seqrecord)
print('seqio with snapgene, then convert to dseqrecord:',dseqrecord.circular)

# Check in the properties to tell if the topology is circular
iscircular = 'topology' in seqrecord.annotations.keys() and seqrecord.annotations['topology'] == 'circular'
dseqrecord = Dseqrecord(seqrecord, circular=iscircular)
print('seqio with snapgene, then convert to dseqrecord:',dseqrecord.circular)


pydna with gb: True
seqio with snapgene, then convert to dseqrecord: False
seqio with snapgene, then convert to dseqrecord: True


## Restriction enzymes

In [2]:
# List of commercial enzymes
from Bio.Restriction.Restriction import CommOnly
from pydna.dseqrecord import Dseqrecord

enzyme = CommOnly.format('EcoRI')

# If we want to know where the enzymes cut in the original sequence
# we use the dseq.cut() method rather than the dseqrecord.cut() one,
# the resulting fragments have a dseq.pos parameter that indicates the
# position of the fragment in the original sequence

seq = 'AAAAAAGAATTCTTTTTTTT'
dseq = Dseqrecord(seq,circular=False)
cutted_list = dseq.seq.cut([enzyme])    
print('linear')
for i in cutted_list:
    print(i.pos)

seq = 'AAAAAAGAATTCTTTTTTAAAGAATTCCCCCCC'
dseq = Dseqrecord(seq,circular=True)
cutted_list = dseq.seq.cut([enzyme])
print('circular')
for i in cutted_list:
    print(i.pos)


linear
0
7
circular
22
40


## Overhangs

`pydna` manages overhangs using the `dseq` parameter `ovhg`, an integer that indicates how much the watson strain protrudes at the 5' end. This value can be an integer if the sequences has a 5' extension there, or negative otherwise.

In [39]:
from pydna.dseq import Dseq

# See this example
dseq = Dseq(watson='GGGAA',ovhg=-3,crick='GGTT')
print('dseq',dseq)

print('dseq actual shape:\n',repr(dseq))

# To store this in the Sequence object of the API, we instead store the 5' and 3' overhangs
# To get this same molecule:
full_sequence = dseq
full_sequence_rev = full_sequence.reverse_complement()

def dseqFromBothOverhangs(full_sequence,overhang_watson,overhang_crick):
    full_sequence_rev = str(Dseq(full_sequence).reverse_complement())
    if overhang_watson<=0:
        if overhang_crick>=0:
            watson = full_sequence
            crick = full_sequence_rev[overhang_crick:overhang_watson]
        else:
            watson = full_sequence[:overhang_crick]
            crick = full_sequence_rev[:overhang_watson]
    else:
        if overhang_crick>=0:
            watson = full_sequence[overhang_watson:]
            crick = full_sequence_rev[overhang_crick:]
        else:
            watson = full_sequence[overhang_watson:overhang_crick]
            crick = full_sequence_rev

    return Dseq(watson,crick=crick,ovhg=overhang_watson)

def bothOverhangsFromDseq(dseq):
    return dseq.ovhg, len(dseq.watson) - len(dseq.crick) + dseq.ovhg

new_dseq = dseqFromBothOverhangs(str(full_sequence),-3,-2)
print(repr(new_dseq))
bothOverhangsFromDseq(new_dseq)

dseq GGGAACC
dseq actual shape:
 Dseq(-7)
GGGAA
   TTGG
Dseq(-7)
GGGAA
   TTGG


(-3, -2)