In [None]:
import numpy as np
import pandas as pd
import ipywidgets
import qgrid

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from cloning import *

In [None]:
rcvenus_seq = "atgagtaaaggcgaagaattgttcactggcgtggtaccgatcctggtagaactggatggcgacgttaatggtcacaagttcagcgttagtggagagggtgaaggtgatgcgacctatggcaaactgaccctgaagctgatctgcacaaccggcaagctgcctgttccttggccgacactggttacaacgctgggctatggcgtacaatgtttcgcacggtacccggaccacatgaagcaacatgacttcttcaagagcgctatgcctgaaggctatgtccaagaaaggactatcttcttcaaagacgacggcaattacaagacacgggccgaagtcaaattcgaaggcgatacgctggtcaacagaatcgagctgaaaggcatcgacttcaaggaagatggcaacatcctgggccataaactggaatataattataacagtcataatgtgtatatcaccgctgacaaacaaaagaatggcatcaaggccaacttcaaaatcagacataacatcgaagatggaggtgttcaactggcagaccactaccaacaaaatactccgatcggcgatggcccggtgctgctgccggataaccattatctgagttatcaaagtaagctgagcaaggatccgaacgaaaaaagagatcatatggttctgctggaattcgtaacggccgcgggcatcacgcatggcatggacgagctgtataaataa"
mvenus_seq = "ATGAGTAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCAACATACGGAAAACTTACCCTTAAATTGATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTTTGGGTTATGGTGTTCAATGCTTTGCGAGATACCCAGATCATATGAAACAGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAAAGAACTATATTTTTCAAAGATGACGGGAACTACAAGACACGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATAGAATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTTGGACACAAATTGGAATACAACTATAACTCACACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCGAACTTCAAAATTAGACACAACATTGAAGATGGAGGTGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCCTACCAATCTAAGCTTTCGAAAGATCCCAACGAAAAGAGAGACCACATGGTCCTTCTTGAGTTTGTAACAGCTGCTGGGATTACACATGGCATGGATGAACTATACAAATAA"
mscfp3_seq = "ATGAGTAAAGGAGAAGAACTTTTCACTGGAGTTGTCCCAATTCTTGTTGAATTAGATGGTGATGTTAATGGGCACAAATTTTCTGTCAGTGGAGAGGGTGAAGGTGATGCAACATACGGAAAACTTACCCTTAAATTTATTTGCACTACTGGAAAACTACCTGTTCCATGGCCAACACTTGTCACTACTCTCACTTGGGGTGTTCAATGCTTTGCAAGATACCCAGATCATATGAAACAGCATGACTTTTTCAAGAGTGCCATGCCCGAAGGTTATGTACAGGAAAGAACTATATTTTTCAAAGATGACGGGAACTACAAGACACGTGCTGAAGTCAAGTTTGAAGGTGATACCCTTGTTAATAGAATCGAGTTAAAAGGTATTGATTTTAAAGAAGATGGAAACATTCTTGGACACAAATTGGAATACAACTACATCTCAGACAATGTATACATCACGGCAGACAAACAAAAGAATGGAATCAAAGCTAACTTCAAAATTAGACACAACATTGAAGATGGAGGCGTTCAACTAGCAGACCATTATCAACAAAATACTCCAATTGGCGATGGCCCTGTCCTTTTACCAGACAACCATTACCTGTCCACACAATCTAAGCTTTCGAAAGATCCCAACGAAAAGAGAGACCACATGGTCCTTCTTGAGTTTGTAACAGCTGCTGGGATTACACATGGCATGGATGAACTATACAAATAA"
mscarlet_seq = "ATGAGTAAAGGAGAAGCTGTGATTAAAGAGTTCATGCGCTTCAAAGTTCACATGGAGGGTTCTATGAACGGTCACGAGTTCGAGATCGAAGGCGAAGGCGAGGGCCGTCCGTATGAAGGCACCCAGACCGCCAAACTGAAAGTGACTAAAGGCGGCCCGCTGCCTTTTTCCTGGGACATCCTGAGCCCGCAATTTATGTACGGTTCTAGGGCGTTCATCAAACACCCAGCGGATATCCCGGACTATTATAAGCAGTCTTTTCCGGAAGGTTTCAAGTGGGAACGCGTAATGAATTTTGAAGATGGTGGTGCCGTGACCGTCACTCAGGACACCTCCCTGGAGGATGGCACCCTGATCTATAAAGTTAAACTGCGTGGTACTAATTTTCCACCTGATGGCCCGGTGATGCAGAAAAAGACGATGGGTTGGGAGGCGTCTACCGAACGCTTGTATCCGGAAGATGGTGTGCTGAAAGGCGACATTAAAATGGCCCTGCGCCTGAAAGATGGCGGCCGCTATCTGGCTGACTTCAAAACCACGTACAAAGCCAAGAAACCTGTGCAGATGCCTGGCGCGTACAATGTGGACCGCAAACTGGACATCACCTCTCATAATGAAGATTATACGGTGGTAGAGCAATATGAGCGCTCCGAGGGTCGTCATTCTACCGGTGGCATGGATGAACTATACAAATAA"

In [None]:
# seq = mvenus_seq.lower()
seq = rcvenus_seq.lower()

# Kondrashov 2016

In [None]:
def parse_kondrashov_mutations(s):
    if pd.isna(s):
        return []
    else:
        try:
            return [parse_kondrashov_mutation(ss) for ss in s.split(":")]
        except:
            print(">>>", s)
            raise


def parse_kondrashov_mutation(s):
    mut_type, mut_from, mut_res, mut_to = re.match(
        r"(S|I|D)([A-Z])(\d+)([A-Z*])", s
    ).groups()
    # Kondrashov uses MixCR 0-based indexing, also is missing initial methionine, so need to add 2 to index
    mut_res = int(mut_res) + 2
    return (mut_type, mut_from, mut_res, mut_to)

In [None]:
kondrashov = pd.read_csv(
    "2016kondrashov/nucleotide_genotypes_to_brightness.tsv", sep="\t"
)
kondrashov["aa_mutations"] = kondrashov["aaMutations"].map(parse_kondrashov_mutations)
kondrashov["nt_mutations"] = kondrashov["nMutations"].map(parse_kondrashov_mutations)
kondrashov["num_aa_mutations"] = kondrashov["aa_mutations"].map(len)
kondrashov["num_nt_mutations"] = kondrashov["nt_mutations"].map(len)
kondrashov["res"] = kondrashov[kondrashov["num_aa_mutations"] == 1]["aa_mutations"].map(
    lambda x: x[0][2]
)
kondrashov_single = kondrashov[kondrashov["num_aa_mutations"] == 1]
kondrashov_single_nt = kondrashov[kondrashov["num_nt_mutations"] == 1]
# qgrid.show_grid(kondrashov)

In [None]:
# number of mutants
kondrashov_single[kondrashov_single["medianBrightness"] <= 3].groupby(["res"])[
    "medianBrightness"
].agg(["mean", "size"]).sort_values("size", ascending=False)

In [None]:
kondrashov_single.join(residue_brightness, on="res", rsuffix="_res_brightness")

In [None]:
residue_brightness = kondrashov_single.groupby(["res"])["medianBrightness"].agg(
    ["mean", "size"]
)
residue_brightness.columns.values[:] = ["h", "z"]
mean_dim_residues = residue_brightness[
    (residue_brightness["size"] > 0) & (residue_brightness["mean"] <= 3)
]
mean_dim_residues.sort_values("mean")

In [None]:
kondrashov[kondrashov["res"] == 57]

In [None]:
# TURN A's into G's
# given a particular transition, list AA mutations which can be implemented using that transition

In [None]:
def count_transitions_for_mutation(seq, res, to_aa, nt_from, nt_to):
    to_aa = to_aa.lower()
    nt_from = nt_from.lower()
    nt_to = nt_to.lower()
    count = 0
    for codon in aa_to_codon[to_aa]:
        diff = site_diff(seq[3 * (res - 1) : 3 * res], codon)
        if len(diff) == 1 and diff[0][1] == nt_from and diff[0][2] == nt_to:
            count += 1
    return count

In [None]:
# noise level of synonymous mutations
kondrashov[
    (kondrashov["num_aa_mutations"] == 0) & (kondrashov["num_nt_mutations"] != 0)
]["medianBrightness"].std()

In [None]:
kondrashov_single[
    kondrashov_single["aa_mutations"].map(
        lambda x: count_transitions_for_mutation(seq, x[0][2], x[0][3], "a", "g")
    )
    > 0
]

In [None]:
count_transitions_for_mutation(seq, 57, "R", "t", "a")

In [None]:
kondrashov_single_nt[kondrashov_single_nt["medianBrightness"] <= 3]

# Chromophore mutation

In [None]:
candidate_chromophore_mutants = """L13P
W55R
F25S
L218P
F128S
Q92R
V110E
Y64C
H215L
H167R
M86K
Q181L
K83E""".split(
    "\n"
)

In [None]:
mut = "M86K"

In [None]:
from_aa, res, to_aa = re.match("([A-Z]+)(\d+)([A-Z]+)", mut).groups()
res = int(res)

In [None]:
seq[3 * (res - 1) : 3 * res]

In [None]:
codon_to_aa["agc"]

# New

In [None]:
ps = stop_codon_primers(rcvenus_seq)

In [None]:
ps_nonnull = [p for p in ps if "forward_primer" in p]
df = pd.DataFrame(ps_nonnull)

In [None]:
df_display = df.copy()
df_display["selected"] = False
df_display["forward_len"] = df_display["forward_primer"].map(len)
df_display["reverse_len"] = df_display["reverse_primer"].map(len)
cols = ["selected", "mutation", "transition", "forward_len", "reverse_len"] + [
    c
    for c in list(ps_nonnull[0].keys())
    if c not in ("mutation", "transition")
    and not c.endswith("_dG")
    and not c.endswith("_primer")
]
df_display = df_display[cols]
table = qgrid.show_grid(
    df_display,
    grid_options={
        "forceFitColumns": False,
        "defaultColumnWidth": 120,
        "autoEdit": True,
    },
    precision=1,
)
table

In [None]:
df2 = table.get_changed_df()
selected_primers = df.iloc[df2[df2["selected"]].index]
selected_primers = selected_primers.sort_index()
selected_primers

In [None]:
selected_primers.to_json("180222primers.json")

In [None]:
jqs_num = 93
prefix = "rcvenus"

In [None]:
for p in selected_primers.itertuples():
    for suffix, seq in (("f", p.forward_primer), ("r", p.reverse_primer)):
        name = "JQS{}_{}_{}_{}".format(jqs_num, prefix, p.mutation, suffix)
        print("{}\t{}".format(name, seq))
        jqs_num += 1

In [None]:
jqs_num = 107
for p in selected_primers.itertuples():
    for suffix, seq in (("rev_f", p.reversion_forward_primer),):
        name = "JQS{}_{}_{}_{}".format(jqs_num, prefix, p.mutation, suffix)
        print("{}\t{}".format(name, seq))
        jqs_num += 1

# Old

In [None]:
for res, codon in enumerate(grouper(seq, 3)):
    for stop_codon in stop_codons:
        diff = site_diff(codon, stop_codon)
        if len(diff) == 1:
            hit = diff[0][2] == "g"
            print(res, "".join(codon), "->", stop_codon, diff, "***" if hit else "")

In [None]:
synonymous_codons = {
    "CYS": ["TGT", "TGC"],
    "ASP": ["GAT", "GAC"],
    "SER": ["TCT", "TCG", "TCA", "TCC", "AGC", "AGT"],
    "GLN": ["CAA", "CAG"],
    "MET": ["ATG"],
    "ASN": ["AAC", "AAT"],
    "PRO": ["CCT", "CCG", "CCA", "CCC"],
    "LYS": ["AAG", "AAA"],
    "STOP": ["TAG", "TGA", "TAA"],
    "THR": ["ACC", "ACA", "ACG", "ACT"],
    "PHE": ["TTT", "TTC"],
    "ALA": ["GCA", "GCC", "GCG", "GCT"],
    "GLY": ["GGT", "GGG", "GGA", "GGC"],
    "ILE": ["ATC", "ATA", "ATT"],
    "LEU": ["TTA", "TTG", "CTC", "CTT", "CTG", "CTA"],
    "HIS": ["CAT", "CAC"],
    "ARG": ["CGA", "CGC", "CGG", "CGT", "AGG", "AGA"],
    "TRP": ["TGG"],
    "VAL": ["GTA", "GTC", "GTG", "GTT"],
    "GLU": ["GAG", "GAA"],
    "TYR": ["TAT", "TAC"],
}

In [None]:
codon_to_aa = {}
for aa, codons in synonymous_codons.items():
    for codon in codons:
        codon_to_aa[codon.lower()] = aa

In [None]:
codon_to_aa

In [None]:
bases = "atcg"
stops_for_mutation = defaultdict(list)

for b1 in bases:
    for b2 in bases:
        if b1 == b2:
            continue
        for res, codon in enumerate(grouper(seq[:-3], 3)):
            sub_codons = synonymous_codons[codon_to_aa["".join(codon)]]
            for stop_codon in stop_codons:
                for sub_codon in sub_codons:
                    sub_codon = sub_codon.lower()
                    diff = site_diff(sub_codon, stop_codon)
                    if len(diff) == 1:
                        # hit = diff[0][1] == 'a' and diff[0][2] == 'g'
                        # hit = diff[0][2] == 'g'
                        hit = diff[0][1] == b2 and diff[0][2] == b1
                        # print(res, ''.join(sub_codon), '->', stop_codon, diff, '***' if hit else '')
                        if hit:
                            stops_for_mutation[(b1, b2)].append(
                                (res, sub_codon, stop_codon, diff)
                            )

In [None]:
num_stops_for_mutation = {k: len(v) for k, v in stops_for_mutation.items()}

In [None]:
stops_for_mutation[("a", "g")]

In [None]:
num_stops_for_mutation

In [None]:
seq