In [1]:
%load_ext autoreload
%autoreload 2

# Orthogonality of our sponge circuit components to sRNA network

The circuit is composed of a synthetic sRNA and two fluorescent proteins mRNA's, cyRFP and EGFP.

## Imports

In [2]:
import os
import pandas as pd
import re
import logging 

from synbio_morpher.srv.io.manage.script_manager import script_preamble
from synbio_morpher.srv.parameter_prediction.IntaRNA.bin.copomus.IntaRNA import IntaRNA
from synbio_morpher.srv.parameter_prediction.simulator import process_raw_stdout
from synbio_morpher.utils.circuit.common.config_setup import retrieve_default_args, load_simulator_kwargs
from synbio_morpher.utils.common.setup import prepare_config, expand_config
from synbio_morpher.utils.data.data_format_tools.common import load_json_as_dict
from synbio_morpher.utils.data.data_format_tools.manipulate_fasta import load_seq_from_FASTA, write_fasta_file
from synbio_morpher.utils.misc.numerical import count_monotonic_group_lengths, find_monotonic_group_idxs
from synbio_morpher.utils.misc.string_handling import string_to_tuple_list
from synbio_morpher.utils.misc.type_handling import get_nth_elements, flatten_listlike

from subprocess import Popen, PIPE, run
from datetime import datetime
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import jax





# Load data

Load the sequences for EGFP and cyRFP, along with synthetic ChiX and the other sRNA's.

In [3]:
fn_tus = os.path.join('..', 'data', 'sRNA', 'EcoCyc', 'EcoCyc_TUs_DH10B.csv')
fn_srnas = os.path.join('..', 'data', 'sRNA', 'EcoCyc', 'EcoCyc_regulatory_RNAs_DH10B.csv')
fn_merged = os.path.join('..', 'data', 'sRNA', 'merged_EcoCyc_RNAInter_sRNATarBase.csv')
fn_inter = os.path.join('..', 'data', 'sRNA', 'merged_inter.csv')
d_tus = pd.read_csv(fn_tus, index_col=0)
d_srnas = pd.read_csv(fn_srnas, index_col=0)
d_merged = pd.read_csv(fn_merged, index_col=0)
d_inter = pd.read_csv(fn_inter, index_col=0)

fn_fps = 'data/fps.fasta'
fn_muts = 'data/scott_mutations.fasta'
muts = load_seq_from_FASTA(fn_muts, as_type='dict')
fps = load_seq_from_FASTA(fn_fps, as_type='dict')

## Get new syntheses

In [39]:
sdir = 'February 2024 synthesis'

all_synths = {}
for f in os.listdir(sdir): 
    syn = load_seq_from_FASTA(os.path.join(sdir, f), as_type='dict')
    all_synths.update(syn)
    
all_synths

{'double_seed_NT_mScarlet-I': 'GGTCTCAGTACAGCGCTCAACGGGTGTGCTTCCCGTTCTGATGAGTCCGTGAGGACGAAAGCGCCTCTACAAATAATTTTGTTTAAacttcctgagccggaacgaaaagttttatcggaatgcgtgttctgatgggcttttggcttacggtATAAGGAGTCCGGTGTAGCGAAAGcaattggtctgcgattcagaccacggtagcgagactaccctttttcCACTGCCTCGCCCTTGCTCACCAtacatttaccctGAAACATTTCGTCCATCTCTAAGAatagtgattaatgtagcaccgccatattgcggtgcttttttttTCGAAGAGACC',
 'sRNA_mScarlet-I_alternative': 'ACAGATCGTCAAGATTAGAAAACGGTAGCAGCATTATCGGAGGTTCTCTAACTAGTGGTCTCAGTACAGCGCTCAACGGGTGTGCTTCCCGTTCTGATGAGTCCGTGAGGACGAAAGCGCCTCTACAAATAATTTTGTTTAAACACCGTCGCTTAAAGTGACGGCATAATAATAAAAAAATGAAATTCCATGTGCACCTTGAACCGGCCAATAGCGATATTGGCCATTTTTTTTCGAAGAGACCATGGATAGCCGTGTCTTCACTGTGCTGCGGCTACCCATCGCCTGAAAACCAGTTGG',
 'MicC': 'TCTACGCAGCGACATGTTTATTAACAGTTGTCTGGCAGCACAAAACTGGTCTCAGTACAGTAGTCACCGGCTGTGCTTGCCGGTCTGATGAGCCTGTGAAGGCGAAACTACCTCTACAAATAATTTTGTTTAACTCGAGAAAAAAAGCCCGGACGACTGTTCGGGCTTGTCTTTTTATATGTTGGAAAATCAGTGGCAATGCAATGGCCCAACAGAAAATGCGTAAAGGAGAAGAACTTTTCCATAAGAGACCTTTACCATGGTCGTAGAAGCCCCCCAGAGTTAG

## Select sRNAs from database

In [51]:
to_remove = ['e14 prophage; ', 'Qin prophage; ', 'Rac prophage; ', 
             'putative antitoxin ', 'antisense regulatory RNA ', 'small antisense RNA ',
             'antitoxin ', 'small regulatory RNA ', 
             'putative antisense regulatory RNA ', 'small regulatory RNA antitoxin ', 'small RNA ']

for r in to_remove:
    d_srnas['Common-Name'] = d_srnas['Common-Name'].str.replace(r, '')

In [53]:
targets = dict(zip(d_srnas['Common-Name'].to_list(), d_srnas['Sequence - DNA sequence']))

# Run IntaRNA

May need to put all the sRNA's into one fasta.

In [54]:
top_dir = 'data/16_orthogonality_synthetics'
fn_query = os.path.join('query.fasta')
fn_targets = os.path.join('targets.fasta')

write_fasta_file(out_path=fn_query, data=all_synths, byseq=True)
write_fasta_file(out_path=fn_targets, data=targets, byseq=True)


In [None]:
run_intarna = True

config = {
    "experiment": {
        "purpose": "tests",
    },
    "system_type": "RNA",
    "interaction_simulator": {
        "name": "IntaRNA",
        "postprocess": True,
        "simulator_kwargs": {
                "outcsvcols": "id1, id2, E, E_norm, bpList, hybridDPfull, seedPu1, seedPu2, seedStart1, seedStart2, seedEnd1, seedEnd2",
                "threads": 6,
                "n": 1,
                "raw_stdout": True
        }
    },
    "molecular_params": {
        "avg_mRNA_per_cell": 100,
        "cell_doubling_time": 1200,
        "creation_rate": 2.35,
        "starting_copynumbers": 200,
        "degradation_rate": 0.01175,
        "association_binding_rate": 1000000
    }
}


if run_intarna:
    data_writer = None
    config, data_writer = script_preamble(config, data_writer)
    config = prepare_config(expand_config(config=config))

    os.rmdir(data_writer.top_write_dir)
    data_writer.top_write_dir = top_dir
    data_writer.reset_ensemble()
    data_writer.unsubdivide()

In [None]:

def simulate_IntaRNA_local(fn_query: str,
                           fn_targets: str,
                           sim_kwargs={}):
    sim_kwargs['query'] = fn_query
    sim_kwargs['target'] = fn_targets

    def run(query: str, target: str, qidxpos0: int, tidxpos0: int, outcsvcols: str, threads: int, n: int = 1,
            param_file: str = '', extra_params: list = [], raw_stdout: bool = False):
        p = Popen(['IntaRNA', '-q', query, '-t', target,
                   '--outMode=C', f'--outcsvcols={outcsvcols}',
                   f'--qIdxPos0={qidxpos0}',
                   f'--tIdxPos0={tidxpos0}',
                   f'--outNumber={n}',
                   f'--threads={threads}', param_file]
                  + extra_params, stdout=PIPE, stderr=PIPE, universal_newlines=True)
        stdout, stderr = p.communicate()
        return process_raw_stdout(stdout)

    return run(**sim_kwargs)


if run_intarna:

    sim_data = simulate_IntaRNA_local(fn_query=fn_query,
                                      fn_targets=fn_targets,
                                      sim_kwargs=config['interaction_simulator']['simulator_kwargs'])

    data_writer.output(data=sim_data, out_type='json',
                       out_name='inter_data_raw', overwrite=True)
    print(data_writer.write_dir)

# Visualise