In [None]:
import re

import Bio.Restriction as Restriction
import pandas as pd
import pygsheets
import requests
import toml
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import paulssonlab.api.geneious as geneious
import paulssonlab.cloning.enzyme as enzyme
import paulssonlab.cloning.registry as registry
import paulssonlab.cloning.sequence as sequence
import paulssonlab.cloning.workflow as workflow

# Setup

In [None]:
config = toml.load("config.toml")

In [None]:
gc = pygsheets.authorize(service_account_file="credentials.json")

In [None]:
reg = registry.Registry(gc, config["registry"]["folder"])

# Config

In [None]:
olib_oligos = reg[("oLIB", "oligos")]
olt_oligos = reg[("oLT", "oligos")]
plib_plasmids = reg[("pLIB", "plasmids")]
plib_maps = reg[("pLIB", "maps")]
flib_fragments = reg[("fLIB", "fragments")]
part_types = reg[("fLIB", "fragments", "Part types")]

# Digest test

In [None]:
len(reg.get("fLIB255")["_seq"])

In [None]:
s = reg.get("mKate2_nocut")
s

In [None]:
len(s["_seq"])

In [None]:
s["_seq"]

In [None]:
s = reg.get("pLIB112")["_seq"]

In [None]:
reg.eval_expr("pLIB112/AarI/BbsI")

# GG test

In [None]:
import random

import networkx as nx

In [None]:
import paulssonlab.cloning.design as design

In [None]:
?design.golden_gate_placeholder

In [None]:
def _make_frag(overhang1, overhang2):
    return enzyme.re_digest(
        design.golden_gate_placeholder(
            Restriction.BsaI, Restriction.BbsI, overhang1, overhang2
        ),
        Restriction.BbsI,
    )[1]

In [None]:
test_set = [
    _make_frag("aaaa", "gggg"),
    _make_frag("gggg", "aatt"),
    _make_frag("aatt", "ggcc"),
    _make_frag("ggcc", "gggg"),
    _make_frag("aaaa", "cccc"),
    # _make_frag("gggg", "cccc"),
]
random.shuffle(test_set)

In [None]:
print(_make_frag("aaaa", "gggg"))

In [None]:
sequence.assemble_circular(test_set)

In [None]:
g = sequence.assembly_graph(test_set, method="goldengate")
g.edges

In [None]:
nx.draw(g, with_labels=True)

# Barcode GFA

In [None]:
" + ".join(
    [
        "barcode_bit{num}_on | barcode_bit{num}_on".format(num=num)
        for num in range(1, 31)
    ]
)

In [None]:
x = reg.get("barcode_bit1_on")

In [None]:
x["_seq"]

In [None]:
test_gfa = """H	VN:Z:1.0
S	lt631_fixed1	AAAAAAAAAAAAAAAAAAAAAAAAAA
S	plac_rbs	NNNNNNNN
S	lt631_fixed2	AAAAAAAAAAAAAAAAAAAAAAAAAA
S	ptet_rbs	NNNNNNNN
S	lt631_fixed3	AAAAAAAAAAAAAAAAAAAAAAAAAA
S	pl_rbs	NNNNNNNN
S	lt631_fixed4	AAAAAAAAAAAAAAAAAAAAAAAAAA
S	barcode_bit0_off	AGAGGAAAGGAGAAAGGTGAC
S	barcode_bit0_on	ATAGGAAATGGTGGTAGTGTC
S	barcode_bit1_off	ATAAGATGGAGAGTAGAGGGC
S	barcode_bit1_on	GGGTGGTTTAGTGTGTGTTTC
S	barcode_bit2_off	TATGATGGAGAGAGGAGGTTC
S	barcode_bit2_on	GGGATGTATTGAAGGAGGATC
S	barcode_bit3_off	AGGAGGAGGAATTATGGTGAC
S	barcode_bit3_on	AGTGTGGGATTGATGAGATAC
S	barcode_bit4_off	AGTGATGTGTGGAAGTTGGTC
S	barcode_bit4_on	AGAGTGAGTAGTAGTGGAGTC
S	barcode_bit5_off	TGGGTAGATAAGTAAGAGGGC
S	barcode_bit5_on	TGGGAGGATTGAGATGAGTTC
S	barcode_bit6_off	GAAGGTTGGAGAGAAGATGTC
S	barcode_bit6_on	TGAAAGGAATGGGTTGTGGTC
S	barcode_bit7_off	GTGAGAGAAGGAGGATGATAC
S	barcode_bit7_on	TGTGGTTTGGAGATGATAGAC
S	barcode_bit8_off	AGAGGGTGTAAGAGAAGAAGC
S	barcode_bit8_on	TGTGATGGAAGTTAGAGGGTC
S	barcode_bit9_off	ATAGTGAAGTGAAGGTGGGAC
S	barcode_bit9_on	AGTTGAGGTGGGAGAGTATTC
S	barcode_bit10_off	GAGAGTTGTATGTTAGGGTGC
S	barcode_bit10_on	GGGTTGATTAGTGGTAGAAAC
S	barcode_bit11_off	GGAGGAGAGTGATGAAGTTTC
S	barcode_bit11_on	GAGATTAGAGATGAGTTGGAC
S	barcode_bit12_off	GTTGAGGAATGGTGGATTGAC
S	barcode_bit12_on	AGGTTAGGTTGAGAATAGGAC
S	barcode_bit13_off	AGGATGTGGGATGGGTTTTAC
S	barcode_bit13_on	GGGTAGTGGGAATGATTTATC
S	barcode_bit14_off	TGGTGGGATGGGTTGTTTAAC
S	barcode_bit14_on	AGGGTGTGTTTGTAAAGGGTC
S	barcode_spacer1	TAGCAATTACATAACAGATAC
S	barcode_bit15_off	GTGTTGTGTGTAGGGTTATGC
S	barcode_bit15_on	GGGATGTGATTTGTTAGGAAC
S	barcode_bit16_off	TTATTGGTGGGTGTAGAGAGC
S	barcode_bit16_on	TGTGGAGGGATTGAAGGATAC
S	barcode_bit17_off	GAAGAGAGTGGGTATGGAATC
S	barcode_bit17_on	GGATGTTATGAGTGTTGGGTC
S	barcode_bit18_off	AGGATTGTTGTTGGTAAGGGC
S	barcode_bit18_on	TTGGAGGTGTAGGGAGTAAAC
S	barcode_bit19_off	GGTTGTGGGTAATGAGTTGAC
S	barcode_bit19_on	TGGGATAGTATGTGGAAAGTC
S	barcode_bit20_off	TGAGGTTGTGGTTGGATATGC
S	barcode_bit20_on	GGGAGAATGAGGTGTAATGTC
S	barcode_bit21_off	ATGGTAGTTGAGTGTGGTTGC
S	barcode_bit21_on	GGGATTATGGGTTTGTAGTAC
S	barcode_bit22_off	GAATGTTGGGAGTAGAAGGTC
S	barcode_bit22_on	TAGAGTTGATAGAGGGAGAAC
S	barcode_bit23_off	GTTGAAAGAGGATGAAGAGGC
S	barcode_bit23_on	GATGAAGATTGAGGGAAGAAC
S	barcode_bit24_off	AGGTGAGGTGTTTGTGAGTTC
S	barcode_bit24_on	GGAGTAGTTGGTTGTTAGGAC
S	barcode_bit25_off	AATTGGTGTGTGGTTTTGGGC
S	barcode_bit25_on	AGTTGGGTATGGAGAAAGGTC
S	barcode_bit26_off	AGGGTGAGAAGGATATGGATC
S	barcode_bit26_on	GATGATGTAGTAGTAAGGGTC
S	barcode_bit27_off	GGAATGAGGGTAGTTAAGAGC
S	barcode_bit27_on	GTGAAGTGGAAGGTGAGATTC
S	barcode_bit28_off	AGTTGGTGGGAAGAAGGAAAC
S	barcode_bit28_on	AGGAGGAGGGTAATGATAGAC
S	barcode_bit29_off	GAGATTGAAGTTGGTGGTTGC
S	barcode_bit29_on	TAGAGGGAGTAAGATGAGGAC
S	barcode_spacer2	ATCACATTGCCATCAGTAAT
L	lt631_fixed1	+	plac_rbs	+	0M
L	plac_rbs	+	lt631_fixed2	+	0M
L	lt631_fixed2	+	ptet_rbs	+	0M
L	ptet_rbs	+	lt631_fixed3	+	0M
L	lt631_fixed3	+	pl_rbs	+	0M
L	pl_rbs	+	lt631_fixed4	+	0M
L	lt631_fixed4	+	barcode_bit0_off	+	0M
L	lt631_fixed4	+	barcode_bit0_on	+	0M
L	barcode_bit0_off	+	barcode_bit1_off	+	0M
L	barcode_bit0_off	+	barcode_bit1_on	+	0M
L	barcode_bit0_on	+	barcode_bit1_off	+	0M
L	barcode_bit0_on	+	barcode_bit1_on	+	0M
L	barcode_bit1_off	+	barcode_bit2_off	+	0M
L	barcode_bit1_off	+	barcode_bit2_on	+	0M
L	barcode_bit1_on	+	barcode_bit2_off	+	0M
L	barcode_bit1_on	+	barcode_bit2_on	+	0M
L	barcode_bit2_off	+	barcode_bit3_off	+	0M
L	barcode_bit2_off	+	barcode_bit3_on	+	0M
L	barcode_bit2_on	+	barcode_bit3_off	+	0M
L	barcode_bit2_on	+	barcode_bit3_on	+	0M
L	barcode_bit3_off	+	barcode_bit4_off	+	0M
L	barcode_bit3_off	+	barcode_bit4_on	+	0M
L	barcode_bit3_on	+	barcode_bit4_off	+	0M
L	barcode_bit3_on	+	barcode_bit4_on	+	0M
L	barcode_bit4_off	+	barcode_bit5_off	+	0M
L	barcode_bit4_off	+	barcode_bit5_on	+	0M
L	barcode_bit4_on	+	barcode_bit5_off	+	0M
L	barcode_bit4_on	+	barcode_bit5_on	+	0M
L	barcode_bit5_off	+	barcode_bit6_off	+	0M
L	barcode_bit5_off	+	barcode_bit6_on	+	0M
L	barcode_bit5_on	+	barcode_bit6_off	+	0M
L	barcode_bit5_on	+	barcode_bit6_on	+	0M
L	barcode_bit6_off	+	barcode_bit7_off	+	0M
L	barcode_bit6_off	+	barcode_bit7_on	+	0M
L	barcode_bit6_on	+	barcode_bit7_off	+	0M
L	barcode_bit6_on	+	barcode_bit7_on	+	0M
L	barcode_bit7_off	+	barcode_bit8_off	+	0M
L	barcode_bit7_off	+	barcode_bit8_on	+	0M
L	barcode_bit7_on	+	barcode_bit8_off	+	0M
L	barcode_bit7_on	+	barcode_bit8_on	+	0M
L	barcode_bit8_off	+	barcode_bit9_off	+	0M
L	barcode_bit8_off	+	barcode_bit9_on	+	0M
L	barcode_bit8_on	+	barcode_bit9_off	+	0M
L	barcode_bit8_on	+	barcode_bit9_on	+	0M
L	barcode_bit9_off	+	barcode_bit10_off	+	0M
L	barcode_bit9_off	+	barcode_bit10_on	+	0M
L	barcode_bit9_on	+	barcode_bit10_off	+	0M
L	barcode_bit9_on	+	barcode_bit10_on	+	0M
L	barcode_bit10_off	+	barcode_bit11_off	+	0M
L	barcode_bit10_off	+	barcode_bit11_on	+	0M
L	barcode_bit10_on	+	barcode_bit11_off	+	0M
L	barcode_bit10_on	+	barcode_bit11_on	+	0M
L	barcode_bit11_off	+	barcode_bit12_off	+	0M
L	barcode_bit11_off	+	barcode_bit12_on	+	0M
L	barcode_bit11_on	+	barcode_bit12_off	+	0M
L	barcode_bit11_on	+	barcode_bit12_on	+	0M
L	barcode_bit12_off	+	barcode_bit13_off	+	0M
L	barcode_bit12_off	+	barcode_bit13_on	+	0M
L	barcode_bit12_on	+	barcode_bit13_off	+	0M
L	barcode_bit12_on	+	barcode_bit13_on	+	0M
L	barcode_bit13_off	+	barcode_bit14_off	+	0M
L	barcode_bit13_off	+	barcode_bit14_on	+	0M
L	barcode_bit13_on	+	barcode_bit14_off	+	0M
L	barcode_bit13_on	+	barcode_bit14_on	+	0M
L	barcode_bit14_off	+	barcode_spacer1	+	0M
L	barcode_bit14_on	+	barcode_spacer1	+	0M
L	barcode_spacer1	+	barcode_bit15_off	+	0M
L	barcode_spacer1	+	barcode_bit15_on	+	0M
L	barcode_bit15_off	+	barcode_bit16_off	+	0M
L	barcode_bit15_off	+	barcode_bit16_on	+	0M
L	barcode_bit15_on	+	barcode_bit16_off	+	0M
L	barcode_bit15_on	+	barcode_bit16_on	+	0M
L	barcode_bit16_off	+	barcode_bit17_off	+	0M
L	barcode_bit16_off	+	barcode_bit17_on	+	0M
L	barcode_bit16_on	+	barcode_bit17_off	+	0M
L	barcode_bit16_on	+	barcode_bit17_on	+	0M
L	barcode_bit17_off	+	barcode_bit18_off	+	0M
L	barcode_bit17_off	+	barcode_bit18_on	+	0M
L	barcode_bit17_on	+	barcode_bit18_off	+	0M
L	barcode_bit17_on	+	barcode_bit18_on	+	0M
L	barcode_bit18_off	+	barcode_bit19_off	+	0M
L	barcode_bit18_off	+	barcode_bit19_on	+	0M
L	barcode_bit18_on	+	barcode_bit19_off	+	0M
L	barcode_bit18_on	+	barcode_bit19_on	+	0M
L	barcode_bit19_off	+	barcode_bit20_off	+	0M
L	barcode_bit19_off	+	barcode_bit20_on	+	0M
L	barcode_bit19_on	+	barcode_bit20_off	+	0M
L	barcode_bit19_on	+	barcode_bit20_on	+	0M
L	barcode_bit20_off	+	barcode_bit21_off	+	0M
L	barcode_bit20_off	+	barcode_bit21_on	+	0M
L	barcode_bit20_on	+	barcode_bit21_off	+	0M
L	barcode_bit20_on	+	barcode_bit21_on	+	0M
L	barcode_bit21_off	+	barcode_bit22_off	+	0M
L	barcode_bit21_off	+	barcode_bit22_on	+	0M
L	barcode_bit21_on	+	barcode_bit22_off	+	0M
L	barcode_bit21_on	+	barcode_bit22_on	+	0M
L	barcode_bit22_off	+	barcode_bit23_off	+	0M
L	barcode_bit22_off	+	barcode_bit23_on	+	0M
L	barcode_bit22_on	+	barcode_bit23_off	+	0M
L	barcode_bit22_on	+	barcode_bit23_on	+	0M
L	barcode_bit23_off	+	barcode_bit24_off	+	0M
L	barcode_bit23_off	+	barcode_bit24_on	+	0M
L	barcode_bit23_on	+	barcode_bit24_off	+	0M
L	barcode_bit23_on	+	barcode_bit24_on	+	0M
L	barcode_bit24_off	+	barcode_bit25_off	+	0M
L	barcode_bit24_off	+	barcode_bit25_on	+	0M
L	barcode_bit24_on	+	barcode_bit25_off	+	0M
L	barcode_bit24_on	+	barcode_bit25_on	+	0M
L	barcode_bit25_off	+	barcode_bit26_off	+	0M
L	barcode_bit25_off	+	barcode_bit26_on	+	0M
L	barcode_bit25_on	+	barcode_bit26_off	+	0M
L	barcode_bit25_on	+	barcode_bit26_on	+	0M
L	barcode_bit26_off	+	barcode_bit27_off	+	0M
L	barcode_bit26_off	+	barcode_bit27_on	+	0M
L	barcode_bit26_on	+	barcode_bit27_off	+	0M
L	barcode_bit26_on	+	barcode_bit27_on	+	0M
L	barcode_bit27_off	+	barcode_bit28_off	+	0M
L	barcode_bit27_off	+	barcode_bit28_on	+	0M
L	barcode_bit27_on	+	barcode_bit28_off	+	0M
L	barcode_bit27_on	+	barcode_bit28_on	+	0M
L	barcode_bit28_off	+	barcode_bit29_off	+	0M
L	barcode_bit28_off	+	barcode_bit29_on	+	0M
L	barcode_bit28_on	+	barcode_bit29_off	+	0M
L	barcode_bit28_on	+	barcode_bit29_on	+	0M
L	barcode_bit29_off	+	barcode_spacer2	+	0M
L	barcode_bit29_on	+	barcode_spacer2	+	0M"""

In [None]:
import gfapy

In [None]:
g = gfapy.Gfa(test_gfa)

In [None]:
s = g.segments[0]

In [None]:
??s.dovetails

In [None]:
Seq

In [None]:
"abc"[-4]

In [None]:
x = slice(-1, -3, -1)

In [None]:
x.indices(3)

In [None]:
?x.indices

In [None]:
# gibson (good)
# re_digest/re.match (special case?)
# parasail, etc.


class VariableSeq:
    def __init__(self, seqs, name=None, upstream=None, downstream=None):
        if isinstance(seqs, Mapping):
            pass
        elif isinstance(seqs, Iterable):
            pass
        elif isinstance(seqs, (str, Seq)):
            if not name:
                raise ValueError("name is required if list of sequences is given")
            self.__init__(
                self,
                {f"{name}.{idx}": seq for idx, seq in enumerate(seqs)},
                upstream=upstream,
                downstream=downstream,
            )
        else:
            raise ValueError(
                "seqs must be a mapping from names to sequences or a list of sequences"
            )

    @classmethod
    def from_seqs(cls, seqs):
        pass

    def squeeze():
        pass

    def __getitem__(self, slice_):
        if isinstance(slice_, Integral):
            if 0 <= slice_:
                if slice_ < len(self.upstream):
                    return self.upstream[slice_]
                else:
                    raise IndexError(
                        f"index {slice_} is out of range for upstream consensus (length {len(self.upstream)})"
                    )
            elif slice_ < 0:
                if -slice_ - 1 < len(self.upstream):
                    return self.downstream[slice_]
                else:
                    raise IndexError(
                        f"index {slice_} is out of range for downstream consensus (length {len(self.downstream)})"
                    )
        elif isinstance(slice_, slice):
            pass
        else:
            pass

    def min_variable_length(self):
        return min(len(seq) for seq in self.seqs)

    def min_length(self):
        return len(upstream) + self.min_variable_length() + len(downstream)

    def max_variable_length(self):
        return max(len(seq) for seq in self.seqs)

    def max_length(self):
        return len(upstream) + self.max_variable_length() + len(downstream)

    # def __len__(self):
    #     return self.max_length()

In [None]:
# DsSeqRecord methods depending on self.seq
# ds_length: len(self.seq)
# upstream_overhang_seq: self.seq_lower()[: abs(self.upstream_overhang)]
# seq_lower: str(self.seq).lower()
# __getitem__: self.seq[index: int]
# __add__: self.can_ligate(other) THEN self.__class__(self.seq + other.seq, ...)
# __str__: str(self.seq), str(self.seq.complement())

# upstream_overhang_seq: self.seq_lower()[: abs(self.upstream_overhang)]
# downstream_overhang_seq: self.seq_lower()[len(self) - abs(self.downstream_overhang) : len(self)]

### LOOK AT GIBSON/GG

# circular/noncircular
# desiderata:
# drop-in replacement for SeqRecord/DsSeqRecord (?)
# fasta/gb export
# for GG/gibson: hhh

# from_gfa
# to_gfa

# __or__ (|) [also implement for DsSeqRecord]
# max_length, min_length
# __len__ raises error or returns None if lengths not equal?
# slicing
# __add__, __radd__
# ligation check
# overhangs


class VariableSeqRecord(sequence.DsSeqRecord):
    def __getitem__(self, index):
        pass

    def __add__(self, other):
        pass

    def __str__(self):
        pass


class VariableSeq:
    def __init__(self, seqs):
        pass

    def from_gfa():
        pass

    def to_gfa():
        pass

    def squeeze():
        pass


class GraphSeq:
    def __init__(self):
        pass

    @classmethod
    def from_gfa(gfa):
        self.seqs = {}

    def to_gfa():
        pass