In [None]:
import numpy as np
import matplotlib.pyplot as plt
import re

# Oligo costs

In [None]:
def idt_cost(max_length, num_oligos):
    if np.ndim(max_length) == 0:
        max_length = np.array([max_length])  # TODO: do we need square brackets??
    if np.ndim(num_oligos) == 0:
        num_oligos = np.array([num_oligos])
    if (
        max_length.shape != num_oligos.shape
        and max_length.size != 1
        and num_oligos.size != 1
    ):
        raise ValueError(
            "each of max_length and num_oligos can be a scalar or a 1d array of length equal to the other"
        )
    total_bp = max_length * num_oligos
    bad_lengths = (40 > max_length) | (max_length > 350)
    cost = np.full(total_bp.shape, 99.0)
    cost += 0.03 * np.maximum(np.minimum(total_bp - 3300, 50000 - 3300), 0)
    cost += 0.02 * np.maximum(np.minimum(total_bp - 50000, 100000 - 50000), 0)
    cost += 0.01 * np.maximum(total_bp - 100000, 0)
    return cost


GENSCRIPT_SIZES = [12472, 92918]
GENSCRIPT_LENGTHS = [(10, 79), (80, 109), (110, 130), (131, 150), (151, 170)]
GENSCRIPT_COSTS = [(1600, 4000), (1800, 4500), (2000, 5000), (2200, 5500), (2400, 6000)]
GENSCRIPT_COSTS = list(zip(*GENSCRIPT_COSTS))


def cost_lookup(sizes, lengths, costs, max_length, num_oligos):
    sizes = np.array(sizes)
    lengths = np.array(lengths)
    costs = np.array(costs)
    if np.ndim(max_length) == 0:
        max_length = np.array([max_length])  # TODO: do we need square brackets??
    if np.ndim(num_oligos) == 0:
        num_oligos = np.array([num_oligos])
    if (
        max_length.shape != num_oligos.shape
        and max_length.size != 1
        and num_oligos.size != 1
    ):
        raise ValueError(
            "each of max_length and num_oligos can be a scalar or a 1d array of length equal to the other"
        )
    max_length = max_length[:, np.newaxis]
    num_oligos = num_oligos[:, np.newaxis]
    matches = (lengths[:, 0] <= max_length) & (max_length <= lengths[:, 1])
    length_idxs = np.argmax(matches, axis=1)
    bad_lengths = ~np.any(matches, axis=1)
    size_idxs = np.argmax(num_oligos <= sizes, axis=1)
    bad_sizes = ~np.any(num_oligos <= sizes, axis=1)
    c = costs[size_idxs, length_idxs].astype(np.float_)
    c[bad_sizes | bad_lengths] = np.nan
    return c


def genscript_cost(max_length, num_oligos):
    return cost_lookup(
        GENSCRIPT_SIZES, GENSCRIPT_LENGTHS, GENSCRIPT_COSTS, max_length, num_oligos
    )


AGILENT_SIZES = [7500, 15000, 60000, 100000, 244000]
AGILENT_LENGTHS = [
    (30, 90),
    (91, 110),
    (111, 130),
    (131, 150),
    (151, 170),
    (171, 190),
    (191, 210),
    (211, 230),
]
AGILENT_COSTS = [
    (1647, 3296, 4865, 6261, 13722),
    (1757, 3515, 5189, 6678, 14691),
    (1868, 3734, 5513, 7096, 15610),
    (2005, 4009, 5918, 7617, 16758),
    (2186, 4371, 6454, 8306, 18272),
    (2473, 4942, 7297, 9391, 20659),
    (2801, 5602, 8270, 10643, 23415),
    (3186, 6370, 9406, 12105, 26628),
]
AGILENT_COSTS = list(zip(*AGILENT_COSTS))


def agilent_cost(max_length, num_oligos):
    return cost_lookup(
        AGILENT_SIZES, AGILENT_LENGTHS, AGILENT_COSTS, max_length, num_oligos
    )


TWIST_LENGTHS = [(20, 120), (121, 150), (151, 200), (201, 250), (251, 300)]
TWIST_RAW = """
Tier 1
(1-2,000 Oligos)
$1,040.00
$1,248.00
$1,352.00
$2,640.00
$3,946.00
Tier 2
(2,001-6,000 Oligos)
$1,560.00
$1,820.00
$2,028.00
$4,136.00
$6,181.00
Tier 3
(6,001-12,000 Oligos)
$2,028.00
$2,433.60
$2,636.40
$5,148.00
$7,694.00
Tier 4
(12,001-18,000 Oligos)
$2,636.40
$3,163.55
$3,427.45
$6,694.00
$10,004.00
Tier 5
(18,001-24,000 Oligos)
$3,427.45
$4,112.55
$4,455.75
$8,702.00
$13,006.00
Tier 6
(24,001-30,000 Oligos)
$4,455.75
$5,346.25
$5,792.80
$11,315.00
$16,910.00
Tier 7
(30,001-36,000 Oligos)
$4,574.05
$5,488.60
$5,946.85
$11,615.00
$17,359.00
Tier 8
(36,001-42,000 Oligos)
$5,031.65
$6,037.85
$6,541.60
$12,775.00
$19,093.00
Tier 9
(42,001-48,000 Oligos)
$5,534.75
$6,641.70
$7,195.50
$14,051.00
$21,000.00
Tier 10
(48,001-54,000 Oligos)
$6,088.55
$7,306.00
$7,915.05
$15,456.00
$23,100.00
Tier 11
(54,001-60,000 Oligos)
$6,697.60
$8,036.60
$8,706.75
$17,003.00
$25,411.00
Tier 12
(60,001-72,000 Oligos)
$7,032.35
$8,438.30
$9,142.25
$17,854.00
$26,684.00
Tier 13
(72,001-84,000 Oligos)
$7,278.05
$8,734.05
$9,462.05
$18,477.00
$27,614.00
Tier 14
(84,001-96,000 Oligos)
$7,476.30
$8,971.30
$9,719.45
$18,981.00
$28,367.00
Tier 15
(96,001-120,000 Oligos)
$7,558.85
$9,071.40
$9,827.35
$19,190.00
$28,680.00
Tier 16
(120,001-150,000 Oligos)
$8,634.60
$10,362.30
$11,225.50
$21,919.00
$32,758.00
Tier 17
(150,001-180,000 Oligos)
$9,498.45
$11,398.40
$12,348.05
$24,111.00
$36,035.00
Tier 18
(180,001-210,000 Oligos)
$10,061.35
$12,073.75
$13,079.95
$25,541.00
$38,172.00
Tier 19
(210,001-240,000 Oligos)
$11,067.55
$13,280.80
$14,387.75
$28,095.00
$41,989.00
Tier 20
(240,001-300,000 Oligos)
$13,650.00
$16,575.00
$17,550.00
$33,000.00
$49,320.00
Tier 21
(300,001-360,000 Oligos)
$16,380.00
$19,890.00
$21,060.00
$39,600.00
$59,184.00
Tier 22
(360,001-420,000 Oligos)
$19,110.00
$23,205.00
$24,570.00
$46,200.00
$69,048.00
Tier 23
(420,001-480,000 Oligos)
$21,840.00
$26,520.00
$28,080.00
$52,800.00
$78,912.00
Tier 24
(480,001-600,000 Oligos)
$26,754.00
$32,487.00
$34,398.00
$64,680.00
$96,667.00
Tier 25
(600,001-696,000 Oligos)
$31,668.00
$38,454.00
$40,716.00
$76,560.00
$114,422.00"""

TWIST_SIZES = []
TWIST_COSTS = []
for s in re.split(r"Tier \d+", re.sub(r"\$|,", "", TWIST_RAW)):
    cells = [ss.strip() for ss in s.split("\n")]
    if not any(cells):
        continue
    TWIST_SIZES.append(int(re.match(r"\((\d+)-(\d+) Oligos\)", cells[1]).groups()[1]))
    TWIST_COSTS.append([float(n) for n in cells[2:] if n])


def twist_cost(max_length, num_oligos):
    return cost_lookup(TWIST_SIZES, TWIST_LENGTHS, TWIST_COSTS, max_length, num_oligos)

In [None]:
length = 150
sizes = np.linspace(239 * 19, 239 * 63, 30)
plt.figure(figsize=(12, 12))
# plt.plot(sizes, [idt_cost(length, size) for size in sizes], label='IDT')
plt.plot(sizes, genscript_cost(length, sizes), label="GenScript")
plt.plot(sizes, agilent_cost(length, sizes) * 0.5, label="Agilent 50% off")
plt.plot(sizes, twist_cost(length, sizes), label="Twist")
plt.legend()
plt.title("Oligo pool of length {}".format(length))
plt.xlabel("Number of oligos")
plt.ylabel("Cost ($)")

In [None]:
from matplotlib.cm import get_cmap

In [None]:
lss = ["-", "--"]
cs = get_cmap("Set1").colors
sizes = [239 * 19, 239 * 63]
lengths = np.arange(100, 350)
plt.figure(figsize=(12, 12))

for (
    size,
    ls,
) in zip(sizes, lss):
    # plt.plot(lengths, [idt_cost(length, size) for length in lengths], label='IDT', ls=ls, c=cs[0])
    plt.plot(
        lengths,
        genscript_cost(lengths, size),
        label="GenScript size={}".format(size),
        ls=ls,
        c=cs[1],
    )
    plt.plot(
        lengths,
        agilent_cost(lengths, size) * 0.5,
        label="Agilent 50% off size={}".format(size),
        ls=ls,
        c=cs[2],
    )
    plt.plot(
        lengths,
        twist_cost(lengths, size),
        label="Twist size={}".format(size),
        ls=ls,
        c=cs[4],
    )
plt.legend()
plt.title("Oligo pool of sizes {}".format(sizes))
plt.xlabel("Length of oligo")
plt.ylabel("Cost ($)")

In [None]:
# correct size for error-correction??

# Length constraints

In [None]:
primer_length = 18
cutsite_length = 4 + 1 + 6
spacer_length = 6
alphabet_size = 19  # 63
fish_probe_length = 20
protein_residues = 234
variable_length = np.arange(0, 200)
num_fish_rounds = np.ceil(np.log(alphabet_size * (variable_length // 3)) / np.log(4))
oligo_length = (
    2 * primer_length
    + 4 * cutsite_length
    + spacer_length
    + 20 * num_fish_rounds
    + variable_length
)
num_oligos = protein_residues * alphabet_size

In [None]:
plt.figure(figsize=(12, 12))
plt.plot(variable_length, oligo_length)
plt.plot(variable_length, 20 * num_fish_rounds)
plt.plot(variable_length, 20 * num_fish_rounds + variable_length)
plt.xlabel("Variable region (nt)")
plt.ylabel("Oligo length (nt)")

In [None]:
lss = ["-", "--"]
cs = get_cmap("Set1").colors
# sizes = [239*19, 239*63]
alphabet_sizes = [19, 63]
plt.figure(figsize=(12, 12))

for (
    alphabet_size,
    ls,
) in zip(alphabet_sizes, lss):
    num_oligos = protein_residues * alphabet_size
    num_fish_rounds = np.ceil(
        np.log(alphabet_size * (variable_length // 3)) / np.log(4)
    )
    oligo_length = (
        2 * primer_length
        + 4 * cutsite_length
        + spacer_length
        + 20 * num_fish_rounds
        + variable_length
    )
    # plt.plot(variable_length, idt_cost(oligo_length, num_oligos), label='IDT', ls=ls, c=cs[0])
    plt.plot(
        variable_length,
        genscript_cost(oligo_length, num_oligos),
        label="GenScript num_oligos={}".format(num_oligos),
        ls=ls,
        c=cs[1],
    )
    plt.plot(
        variable_length,
        agilent_cost(oligo_length, num_oligos) * 0.5,
        label="Agilent 50% off num_oligos={}".format(num_oligos),
        ls=ls,
        c=cs[2],
    )
    plt.plot(
        variable_length,
        twist_cost(oligo_length, num_oligos),
        label="Twist num_oligos={}".format(num_oligos),
        ls=ls,
        c=cs[4],
    )
plt.legend()
plt.title("Oligo pool of sizes {}".format(sizes))
plt.xlabel("Variable region (nt)")
plt.ylabel("Cost ($)")