In [None]:
import itertools
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import os
OUTPATH = "outs_final_nee"
os.makedirs(OUTPATH, exist_ok=True)

In [None]:
def number_to_base(n, b, L, invert=False):
    digits = [0] * L
    rem = n
    rem_n = n
    for j in list(range(L))[::-1]:
        rem_n = rem % (b ** j)
        div = rem // (b ** j)
        if rem_n == rem:
            continue
        if rem_n == 0:
            if rem > 0:
                digits[j] = div
                break
            else:
                break
        rem = rem_n
        digits[j] = div
    if invert:
        digits = [1-el for el in digits]
    return digits


# for (int i = 0; i < L; i++) count += (int)pow(base, i) * String[i];
def base_to_number(els, b):
    count = 0
    for i, el in enumerate(els):
        count += b ** i * el
    return count

In [None]:
# Check for README of geno index
geno = 12557964 - 1
seq = number_to_base(geno, 4, 12)
print("".join(map(str, seq)))
alphabet = ["A", "U", "C", "G"]
for alphabet_ in list(itertools.permutations(alphabet)):
    print(alphabet_, "".join([alphabet_[el] for el in seq]))
for alphabet_ in list(itertools.permutations(alphabet)):
    print("".join([alphabet_[el] for el in seq]))

In [None]:
# Check number to base
base = number_to_base(10001, 2, 20)
print(base)
print(base_to_number(base, 2))

In [None]:
def plot_polyomino(phenotype, tiles=None, oris=None):
    import matplotlib.lines as mlines
    import matplotlib.patches as mpatches
    import matplotlib.path as mpath
    from matplotlib.collections import PatchCollection

    colors= ["red", "blue", "green"]
    colors = ["#66c2a5", "#fc8d62", "#8da0cb"]
    colors = ["#1b9e77", "#d95f02", "#7570b3"]
    pos = np.array(phenotype).reshape(-1, 2)
    fig, ax = plt.subplots(1, 1, figsize=(4, 4))

    maxs, mins = pos.max(axis=0).astype(int), pos.min(axis=0).astype(int)
    ax.set(xlim=(mins[0] - 1, maxs[0] + 2), ylim=(mins[1] - 1, maxs[1] + 2))
    ax.axis("off")
    lw = 0.5
    for x in range(mins[0], maxs[0] + 2):
        ax.plot(
            [x, x], [mins[1], maxs[1] + 1], lw=lw, ls=":", zorder=-10, color="lightgrey"
        )
    for y in range(mins[1], maxs[1] + 2):
        ax.plot(
            [mins[0], maxs[0] + 1], [y, y], lw=lw, ls=":", zorder=-10, color="lightgrey"
        )

    patches = [
        mpatches.FancyBboxPatch(
            el,
            1.0,
            1.0,
            # boxstyle=mpatches.BoxStyle("Round", pad=0.02, rounding_size=0.15),
            boxstyle=mpatches.BoxStyle("Round", pad=-0.01, rounding_size=0.15),
            alpha=1.0,
        )
        for el in pos
    ]
    if tiles is not None:
        color=[colors[tile] for tile in tiles]
    else:
        color="lightgrey"
        
    collection = PatchCollection(patches, color=color, alpha=0.3, lw=0, zorder=10)
    # collection = PatchCollection(patches, color="lightgrey", alpha=.9, lw=0, zorder=10)
    ax.add_collection(collection)

    # Add oris
    import matplotlib.patches as mpatches
    arr_length = 0.25
    x_off, y_off = 0.5, 0.5
    # ori = 3

    if oris is not None:
        for pos, ori in zip(pos, oris):
            x, y = pos[0], pos[1]
            if ori == 0:
                x_tail, y_tail = x + x_off , y + y_off - arr_length
                x_head, y_head = x + x_off, y + y_off + arr_length
            if ori == 1:
                x_tail, y_tail = x + x_off - arr_length , y + y_off
                x_head, y_head = x + x_off + arr_length, y + y_off
            if ori == 2:
                x_tail, y_tail = x + x_off , y + y_off + arr_length
                x_head, y_head = x + x_off, y + y_off - arr_length
            if ori == 3:
                x_tail, y_tail = x + x_off + arr_length , y + y_off
                x_head, y_head = x + x_off - arr_length, y + y_off

            arrow = mpatches.FancyArrow(
                x_tail, y_tail, x_head-x_tail, y_head-y_tail,
                # mutation_scale=14,
                length_includes_head=True,
                width=0.03,
                head_width=0.2,
                head_length=0.2,
                color="k"
            )
            ax.add_patch(arrow)

    return ax

In [None]:
# Poly example: s_2_8
genotype = 17
phenotype = "0,0,1,0,0,1,1,1"
els = number_to_base(genotype, 8, 8)

pheno = ",".join(map(str, els))
print(f"S_2,8:")
print(f"Geno : {base_to_number(els, 8):>30}")
print(f"Pheno: {pheno:>30}")

tiles = [
0, 0, 0, 0
]

oris = [
    0, 1, 3, 2,
]

phenotype_pos = [0, 0, 0, 1, 1, 0, 1, 1]
plot_polyomino(phenotype_pos, tiles=tiles, oris=oris)
plt.savefig(f"{OUTPATH}/s28_example.pdf", transparent=True, bbox_inches="tight")

# Poly example: s_3_8
genotype = 112641553
els = number_to_base(genotype, 8, 12)

pheno = ",".join(map(str, els))
print(f"S_3,8:")
print(f"Geno : {base_to_number(els, 8):>30}")
print(f"Pheno: {pheno:>30}")

tiles = [
    2,
    2, 2, 1, 2,
    2, 1, 0, 0, 2,
    2, 0, 0, 1, 2,
    2, 1, 2, 2,
    2
]

oris = [
    0,
    0, 1, 0, 3,
    1, 1, 0, 3, 0,
    2, 1, 2, 3, 3,
    1, 2, 3, 2,
    2,
]

phenotype_pos = [
    [ 3, -2],
    [ 1, -1],
    [ 2, -1],
    [ 3, -1],
    [ 4, -1],
    [ 0,  0],
    [ 1,  0],
    [ 2,  0],
    [ 3,  0],
    [ 4,  0],
    [ 1,  1],
    [ 2,  1],
    [ 3,  1],
    [ 4,  1],
    [ 5,  1],
    [ 1,  2],
    [ 2,  2],
    [ 3,  2],
    [ 4,  2],
    [ 2,  3]
]

phenotype_pos = sum(phenotype_pos, [])
def get_coords(s="3-21-12-13-14-1001020304011213141511222324223"):
    import re
    return np.array(
        list(map(int, re.sub(r",$", r"", re.sub(r"(\d{1})", r"\1,", s)).split(",")))
    ).reshape(-1, 2)


# Remove negative numbers by translating, max must be less than 10 afterwards
coords = get_coords("".join(list(map(str, phenotype_pos))))
mins = coords.min(axis=0)
new_coords = coords - mins
new_coords_as_str = "".join(list(map(str, list(new_coords.reshape(-1)))))
print(new_coords_as_str)

plot_polyomino(phenotype_pos, tiles=tiles, oris=oris)
plt.savefig(f"{OUTPATH}/s38_example.pdf", transparent=True, bbox_inches="tight")

In [None]:
# fRNA L = 30
# UGGAACGAGUUCUUGGAAUGAAUCCCAUGC
# L0: .(((....((((...))))...))).....
# L1: _[_[]_]_
# L3: [[]]

In [None]:
HP_COLORS = {"H": "sandybrown", "P": "dodgerblue"}
RNA_COLORS = {"A": "#a6cee3", "C": "#1f78b4", "G": "#b2df8a", "U":"#33a02c"}
def plot_string(word, fname=None, width=0.6, fontsize=60, colors=HP_COLORS):
    from matplotlib import rcParams
    rcParams['font.family'] = 'monospace'
    fig, ax = plt.subplots(1, 1, figsize=(len(word), 2))
    cs = map_seq_to_colors(word, colors=colors)
    for i, el in enumerate(word):
        ax.text(i, 0, el, fontsize=fontsize, color=cs[i])
    ax.set_xlim(0, len(word))
    ax.set_ylim(-10, 10)
    ax.axis("off")
    if fname is not None:
        plt.savefig(f"{OUTPATH}/{fname}.pdf", transparent=True, bbox_inches='tight')
    plt.show()

In [None]:
# HP5x5 example
def get_pos(phenotype):
    if "F" not in phenotype and "B" not in phenotype:
        D = 2
    else:
        D = 3

    L = len(phenotype) + 1
    pos = np.zeros((L, D))
    for i, el in enumerate(phenotype):
        pos[i + 1, :] = pos[i, :]
        if el == "U":
            pos[i + 1, 1] = pos[i, 1] + 1
        elif el == "D":
            pos[i + 1, 1] = pos[i, 1] - 1
        elif el == "L":
            pos[i + 1, 0] = pos[i, 0] - 1
        elif el == "R":
            pos[i + 1, 0] = pos[i, 0] + 1
        elif el == "F":
            pos[i + 1, 2] = pos[i, 2] + 1
        elif el == "B":
            pos[i + 1, 2] = pos[i, 2] - 1
    return pos


def map_seq_to_colors(seq, colors=HP_COLORS):
    return [colors[el] for el in seq]

def plot_hp_phenotype(phenotype, sequence=None):
    if sequence is None:
        color = "grey"
    else:
        color = map_seq_to_colors(sequence)
    print(color)
    pos = get_pos(phenotype)
    if "F" not in phenotype and "B" not in phenotype:
        dim = 2
    else:
        dim = 3

    if dim == 2:

        maxs, mins = pos.max(axis=0).astype(int), pos.min(axis=0).astype(int)

        fig, ax = plt.subplots(1, 1, figsize=((maxs[0] - mins[0]), (maxs[1] - mins[1])))
        # ax.set_aspect("equal")
        ax.plot(pos[:, 0], pos[:, 1], lw=3, color="k", zorder=-1)
        ax.scatter(pos[:, 0], pos[:, 1], color=color, s=300, zorder=2)
        ax.axis("off")

        lw = 0.75
        for x in range(mins[0], maxs[0] + 1):
            ax.plot(
                [x, x], [mins[1], maxs[1]], ls=":", zorder=-10, color="lightgrey", lw=lw
            )
        for y in range(mins[1], maxs[1] + 1):
            ax.plot(
                [mins[0], maxs[0]], [y, y], ls=":", zorder=-10, color="lightgrey", lw=lw
            )

        ax.set(xlim=(mins[0] - 1, maxs[0] + 1), ylim=(mins[1] - 1, maxs[1] + 1))
        return ax

    if dim == 3:
        fig = plt.figure()
        ax = fig.add_subplot(projection="3d")
        # ax.view_init(elev=10., azim=290)
        ax.view_init(elev=10.0, azim=290)
        # ax.set_aspect("equal")
        ax.plot(pos[:, 0], pos[:, 1], pos[:, 2], lw=4, color="k", zorder=-1)
        ax.scatter(
            pos[:, 0], pos[:, 1], pos[:, 2], color=color, s=300, zorder=2, alpha=1.0
        )
        maxs, mins = pos.max(axis=0).astype(int), pos.min(axis=0).astype(int)
        ax.axis("off")
        lw = 0.75
        for y in range(maxs[1] + 1):
            for x in range(maxs[0] + 1):
                ax.plot(
                    [x, x],
                    [y, y],
                    [mins[2], maxs[2]],
                    ls=":",
                    zorder=-10,
                    color="lightgrey",
                    lw=lw,
                )
        for y in range(maxs[1] + 1):
            for z in range(maxs[2] + 1):
                ax.plot(
                    [mins[0], maxs[0]],
                    [y, y],
                    [z, z],
                    ls=":",
                    zorder=-10,
                    color="lightgrey",
                    lw=lw,
                )
        for z in range(maxs[2] + 1):
            for x in range(maxs[0] + 1):
                ax.plot(
                    [x, x],
                    [mins[1], maxs[1]],
                    [z, z],
                    ls=":",
                    zorder=-10,
                    color="lightgrey",
                    lw=lw,
                )

        return ax


def print_hp_genotype(genotype, stdout=True):
    if stdout:
        print("".join([("H" if el == 0 else "P") for el in genotype]))
    else:
        return [("H" if el == 0 else "P") for el in genotype]


# HP3x3x3
genotype = 9867302
print(number_to_base(genotype, 2, 27))
hp_genotype = number_to_base(genotype, 2, 27)
print("".join([("H" if el == 0 else "P") for el in hp_genotype]))
phenotype = "UURDDRUUFDDLUULDDFUURDDRUU" # 1

# Convert coordinate system of phenotype for 3D to match picture
# "U" ->  "F", "D" -> "B", "F" -> "U", "B" -> "D"
subs = {"U":"F", "D":"B", "F":"U", "B":"D"}
rotated_phenotype = "".join([(subs[el] if el in subs else el) for el in phenotype])
print(phenotype)
print(rotated_phenotype)

word = print_hp_genotype(hp_genotype, stdout=False)
plot_string(word, fontsize=100, fname="HP3x3x3_genotype")

plot_hp_phenotype(phenotype, sequence=print_hp_genotype(hp_genotype, stdout=False))
plt.savefig(f"{OUTPATH}/HP3x3x3_example.pdf", transparent=True, bbox_inches="tight")

In [None]:
# HP5x5

genotype = 526016 - 1 # 1
genotype = number_to_base(genotype, 2, 25)
print_hp_genotype(genotype)

phenotype = "UUUURDDDDRUUUURDDDDRUUUU" # 1

word = print_hp_genotype(genotype, stdout=False)
plot_string(word, fontsize=100, fname="HP5x5_genotype")
plot_hp_phenotype(phenotype, sequence=print_hp_genotype(genotype, stdout=False))
plt.savefig(f"{OUTPATH}/HP5x5_example.pdf", transparent=True, bbox_inches="tight")

In [None]:
# HP25 example
# Get line - 1
# genotype = 9743772 - 1 # 1
genotype = 29390751 - 1 # 2
# genotype = 14085596 - 1 # 3
# genotype = 14122912 - 1 # 4
# genotype = 13860768 - 1 # 4

phenotype = "UUUUURDDDDDDLLULURUULURU" #1
phenotype = "UUUUURDDDDRUUUUULLLDDDDD" #2
# phenotype = "UUUUURDDDDRUUUURDDDDDLLD" #3
# phenotype = "UUUUURDDDDRUUURDDRDLDLLD" #4

genotype = number_to_base(genotype, 2, 25, invert=True)
# genotype = number_to_base(genotype, 2, 25, invert=False)
print_hp_genotype(genotype)

subs = {"U":"L", "D":"R", "R":"U", "L":"D"}
rotated_phenotype = "".join([(subs[el] if el in subs else el) for el in phenotype])
print(phenotype)
print(rotated_phenotype)

word = print_hp_genotype(genotype, stdout=False)
plot_string(word, fontsize=100, fname="HP25_genotype")

plot_hp_phenotype(phenotype, sequence=print_hp_genotype(genotype, stdout=False))
plt.savefig(f"{OUTPATH}/HP25_example.pdf", transparent=True, bbox_inches="tight")

In [None]:
# HP20 example
genotype = 311759 - 1 # 1
genotype = 803279 - 1 # 2
genotype = 868815 - 1 # 2
genotype = 618448 - 1 # 3
# genotype = 821712 - 1 # 4
# genotype = 941008 - 1 # 5
# genotype = 949200 - 1 # 5
# genotype = 850896 - 1 # 6

phenotype = "UUUURDDDDDLLUUULURU" # 1
phenotype = "UUUURDDDDDLLUULUURD" # 2
phenotype = "UUUURDDDRUURDDDLDLU" # 3
# phenotype = "UUUURDDDRDLDLLULURU" # 4
# phenotype = "UUUURDDRURDDDLDLUUR" # 5
# phenotype = "UUUURDDRURDDLLDDRUR" # 6


genotype = number_to_base(genotype, 2, 20, invert=True)
print_hp_genotype(genotype)

subs = {"U":"L", "D":"R", "R":"U", "L":"D"}
rotated_phenotype = "".join([(subs[el] if el in subs else el) for el in phenotype])
print(phenotype)
print(rotated_phenotype)

word = print_hp_genotype(genotype, stdout=False)
plot_string(word, fontsize=100, fname="HP20_genotype")

plot_hp_phenotype(phenotype, sequence=print_hp_genotype(genotype, stdout=False))
plt.savefig(f"{OUTPATH}/HP20_example.pdf", transparent=True, bbox_inches="tight")

In [None]:
# Examples for figure of fitness
s = ".(((..(((.((....))..)))..))).."
o = ".....(((((...((....)).)))))..."
t = "..(((....))).(((..(....)..)))."

In [None]:
RNA_COLORS = {"A": "#1f78b4", "C": "#b2df8a", "U": "#a6cee3", "G":"#33a02c"}

# RNA 12
rna_geno = "GGGGGAAAACCC"
rna_string = [el for el in rna_geno]
plot_string(rna_string, colors=RNA_COLORS, fontsize=100, fname="RNA12_genotype")

# RNA 15
rna_geno = "GGGACCAAAGGUCCC"
rna_string = [el for el in rna_geno]
plot_string(rna_string, colors=RNA_COLORS, fontsize=100, fname="RNA15_genotype")

# RNA 30
# OLD WRONG ONE
# rna_geno = "UGGCUUUCAACAAGGGUGUAGGAUGGCCAG"
rna_geno = "UGGAACGAGUUCUUGGAAUGAAUCCCAUGC"
rna_string = [el for el in rna_geno]
plot_string(rna_string, colors=RNA_COLORS, fontsize=100, fname="fRNA30_genotype")