In [None]:
import pandas as pd
from Bio import Restriction
from tqdm.auto import tqdm, trange

from paulssonlab.cloning.design import random_bases
from paulssonlab.cloning.enzyme import re_digest
from paulssonlab.cloning.primers import Primer, PrimerPair
from paulssonlab.cloning.sequence import reverse_complement

In [None]:
seq_1a = "GAAGACCGACGAGGAGAGAGACCGATAGC"  # primer on left
seq_2e = "AGCTTAGGTCTCTGCTTTGAATGGTCTTC"  # primer on right

In [None]:
re_digest(seq_a1, Restriction.AarI)

In [None]:
%%time
enzymes = [Restriction.BsaI, Restriction.BsmBI, Restriction.BbsI, Restriction.AarI]
candidates = []
for i in trange(10000):
    primer1 = Primer(binding=random_bases(30) + random_bases(1, "gc"))
    primer2 = Primer(binding=random_bases(30) + random_bases(1, "gc"))
    rejected = False
    for enz in enzymes:
        try:
            if (
                len(re_digest(primer1.seq, enz)) != 1
                or len(re_digest(primer2.seq, enz)) != 1
            ):
                rejected = True
                break
        except:
            rejected = True
            break
    if rejected:
        continue
    pair = PrimerPair(primer1, primer2)
    # cache computations
    (
        primer1.seq,
        primer2.seq,
        pair.ta,
        primer1.mfe_monomer,
        primer2.mfe_monomer,
        primer1.mfe_homodimer,
        primer2.mfe_homodimer,
        pair.mfe_heterodimer,
    )
    candidates.append(pair)

In [None]:
%%time
selected_candidates = list(
    sorted(
        [
            p
            for p in candidates
            if abs(p.primer1.tm - p.primer2.tm) < 3
            and 40 <= p.primer1.gc <= 60
            and 40 <= p.primer2.gc <= 60
        ],
        key=lambda p: min(
            p.primer1.mfe_monomer,
            p.primer2.mfe_monomer,
            p.primer1.mfe_homodimer,
            p.primer2.mfe_homodimer,
            p.mfe_heterodimer,
        ),
        reverse=True,
    )
)[:4]

In [None]:
pd.DataFrame(
    [
        {
            "ta": p.ta,
            "tm1": p.primer1.tm,
            "tm2": p.primer2.tm,
            "monomer1": p.primer1.mfe_monomer,
            "monomer2": p.primer2.mfe_monomer,
            "homodimer1": p.primer1.mfe_homodimer,
            "homodimer2": p.primer2.mfe_homodimer,
            "heterodimer": p.mfe_heterodimer,
            "gc1": p.primer1.gc,
            "gc2": p.primer2.gc,
        }
        for p in selected_candidates
    ]
)

In [None]:
selected_candidates[3].primer2.mfe_homodimer

In [None]:
selected_candidates[0].primer1.seq

In [None]:
selected_candidates[0].primer2.seq

In [None]:
for idx, p in enumerate(selected_candidates, 2):
    upstream_part_seq = f"{p.primer1.seq}{seq_1a}"
    downstream_part_seq = f"{seq_2e}{reverse_complement(p.primer2.seq)}"
    print(f"RAND{idx}_JUNCT_1A_top\t{upstream_part_seq}")
    print(f"RAND{idx}_JUNCT_1A_bottom\t{reverse_complement(upstream_part_seq)}")
    print(f"RAND{idx}_JUNCT_2E_top\t{downstream_part_seq}")
    print(f"RAND{idx}_JUNCT_2E_bottom\t{reverse_complement(downstream_part_seq)}")

In [None]:
for idx, p in enumerate(selected_candidates, 2):
    print(f"RAND{idx}_JUNCT_1A_primer\t{p.primer1.seq}")
    print(f"RAND{idx}_JUNCT_2E_primer\t{p.primer2.seq}")