# Pydna / Biopython behaviour

Here some code chunks showing some important points from biopython / pydna

## How to know if a sequence is circular or linear

This is important, and might not be straightforward from certain formats.

In [12]:
from pydna.parsers import parse as pydna_parse
from Bio.SeqIO import read as seqio_read
from pydna.dseqrecord import Dseqrecord
# Pydna reader gets it right from the genbank file:
dseqrecords = pydna_parse('../sequences/addgene-plasmid-39296-sequence-49545.gbk')
print('pydna with gb:',dseqrecords[0].circular)

seqrecord = seqio_read('../sequences/addgene-plasmid-39296-sequence-49545.dna','snapgene')

# Simple conversion will not get right the fact that it is circular
dseqrecord = Dseqrecord(seqrecord)
print('seqio with snapgene, then convert to dseqrecord:',dseqrecord.circular)

# Check in the properties to tell if the topology is circular
iscircular = 'topology' in seqrecord.annotations.keys() and seqrecord.annotations['topology'] == 'circular'
dseqrecord = Dseqrecord(seqrecord, circular=iscircular)
print('seqio with snapgene, then convert to dseqrecord:',dseqrecord.circular)


pydna with gb: True
seqio with snapgene, then convert to dseqrecord: False
seqio with snapgene, then convert to dseqrecord: True


## Restriction enzymes

In [3]:
# List of commercial enzymes
from Bio.Restriction.Restriction import CommOnly
from pydna.dseqrecord import Dseqrecord
enzyme = CommOnly.format('EcoRI')

# If we want to know where the enzymes cut in the original sequence
# we use the dseq.cut() method rather than the dseqrecord.cut() one,
# the resulting fragments have a dseq.pos parameter that indicates the
# position of the fragment in the original sequence

seq = 'AAAAAAGAATTCTTTTTTTT'
dseq = Dseqrecord(seq,circular=False)
cutted_list = dseq.seq.cut([enzyme])    
print('linear')
for i in cutted_list:
    print(i.pos)

seq = 'AAAAAAGAATTCTTTTTTAAAGAATTCCCCCCC'
dseq = Dseqrecord(seq,circular=True)
cutted_list = dseq.seq.cut([enzyme])
print('circular')
for i in cutted_list:
    print(i.pos)


linear
0
7
circular
22
40


## Requesting genes to Genbank

What happens when Genbank is down?

In [4]:
from pydna.genbank import Genbank
from pydna.dseqrecord import Dseqrecord
# The HTTPError is taken from this library
from urllib.error import HTTPError, URLError


gb = Genbank("example@gmail.com")

# A request when genbank is down (returns 'HTTP Error 500: Internal Server Error')
try:
    seq = Dseqrecord(gb.nucleotide('NM_001018957.2'))
    print(seq)
except HTTPError as exception:
    print(exception)
    print(repr(exception))
except URLError as exception:
    print(exception.reason, type(exception.reason))



    

Dseqrecord
circular: False
size: 2671
ID: NM_001018957.2
Name: NM_001018957
Description: Schizosaccharomyces pombe antiparallel microtubule cross-linking factor Ase1 (ase1), mRNA
Database cross-references: BioProject:PRJNA127, BioSample:SAMEA3138176
Number of features: 3
/molecule_type=DNA
/topology=linear
/data_file_division=PLN
/date=03-APR-2018
/accessions=['NM_001018957']
/sequence_version=2
/keywords=['RefSeq']
/source=Schizosaccharomyces pombe (fission yeast)
/organism=Schizosaccharomyces pombe
/taxonomy=['Eukaryota', 'Fungi', 'Dikarya', 'Ascomycota', 'Taphrinomycotina', 'Schizosaccharomycetes', 'Schizosaccharomycetales', 'Schizosaccharomycetaceae', 'Schizosaccharomyces']
/references=[Reference(title='The genome sequence of Schizosaccharomyces pombe', ...), Reference(title='Direct Submission', ...), Reference(title='Direct Submission', ...)]
/comment=PROVISIONAL REFSEQ: This record has not yet been subject to final
NCBI review. This record is derived from an annotated genomic
seq

In [5]:
# A request when the id does not exist
try:
    seq = Dseqrecord(gb.nucleotide('doesntexist'))
except HTTPError as exception:
    print(exception)
    print(repr(exception))
    

HTTP Error 400: Bad Request
<HTTPError 400: 'Bad Request'>


## Ligation with sticky ends


In [6]:
from pydna.dseqrecord import Dseqrecord
from Bio.Restriction.Restriction import CommOnly

# When only one orientation is possible

enzyme = CommOnly.format('EcoRI')
seq = 'AAAAAAGAATTCTTTTTTTT'
dseq = Dseqrecord(seq,circular=False)
cutted_list = dseq.seq.cut([enzyme])

print('Joining sticky ends----')
print(repr(cutted_list[0] + cutted_list[1]))

# Order matters (This is a blunt end join of the ends)
print('Joining blunt ends----')
print(repr(cutted_list[1] + cutted_list[0]))

# When two orientations are possible
seq = 'AAAAAAGAATTCTAAACCCTGAATTC'
dseq = Dseqrecord(seq,circular=True)
cutted_list = dseq.seq.cut([enzyme])

print()
print(repr(cutted_list[0]))
print(repr(cutted_list[1]))

# Note how order matters in the sum when a fragment can be inserted in both directions
pre_assemble1 = cutted_list[0] + cutted_list[1]
pre_assemble2 = cutted_list[1] + cutted_list[0]

print()
print('Assembly of the same fragment in both directions ----')
print(pre_assemble1.looped())
print(pre_assemble2.looped())





Joining sticky ends----
Dseq(-20)
AAAAAAGAATTCTTTTTTTT
TTTTTTCTTAAGAAAAAAAA
Joining blunt ends----
Dseq(-24)
AATTCTTTTTTTTAAAAAAG
    GAAAAAAAATTTTTTCTTAA

Dseq(-16)
AATTCAAAAAAG
    GTTTTTTCTTAA
Dseq(-18)
AATTCTAAACCCTG
    GATTTGGGACTTAA

Assembly of the same fragment in both directions ----
AATTCAAAAAAGAATTCTAAACCCTG
AATTCTAAACCCTGAATTCAAAAAAG


# Understanding PCRs

In [1]:
# PCR
from pydna.parsers import parse
from pydna.dseqrecord import Dseqrecord
from pydna.primer import Primer
from pydna.amplify import Anneal
from pydna.amplicon import Amplicon

# Read a plasmid
template = parse('../sequences/pFA6a-hphMX6.gb')[0]

# We remove the features to see if the primer annealing is added
# template.features = list()

# Create the adequate primers (these ones have extensions to be used for a gene deletion)

primer1 = Primer(
    record = 'AGTTTTCATATCTTCCTTTATATTCTATTAATTGAATTTCAAACATCGTTTTATTGAGCTCATTTACATCAACCGGTTCACGGATCCCCGGGTTAATTAA',
    id = 'P1',
    name = 'ase1_forward'

)

primer2 = Primer(
    record = 'CTTTTATGAATTATCTATATGCTGTATTCATATGCAAAAATATGTATATTTAAATTTGATCGATTAGGTAAATAAGAAGCGAATTCGAGCTCGTTTAAAC',
    id = 'P2',
    name = 'ase1_reverse'
)

anneal = Anneal([primer1,primer2], template, limit=13)
amplicon: Amplicon = anneal.products[0]

# The last two added features are the primer alignment sites
# TODO write a test because we rely on this behaviour
# print(amplicon.template.features[-2:])
forward_alignment = []





AAAAAA 21
