In [1]:
%load_ext autoreload
%autoreload 2

# Orthogonality of sRNA in mRNA targetting

## Imports

In [2]:
import os
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
import pandas as pd

from synbio_morpher.srv.io.manage.script_manager import script_preamble
from synbio_morpher.srv.parameter_prediction.IntaRNA.bin.copomus.IntaRNA import IntaRNA
from synbio_morpher.srv.parameter_prediction.simulator import simulate_IntaRNA
from synbio_morpher.srv.sequence_exploration.sequence_analysis import b_tabulate_mutation_info
from synbio_morpher.utils.common.setup import construct_circuit_from_cfg, prepare_config, expand_config
from synbio_morpher.utils.evolution.evolver import Evolver
from synbio_morpher.utils.circuit.agnostic_circuits.circuit_manager import CircuitModeller
from synbio_morpher.utils.data.data_format_tools.common import load_multiple_as_list
from synbio_morpher.utils.results.analytics.naming import get_true_interaction_cols

import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import jax
jax.config.update('jax_platform_name', 'cpu')

jax.devices()



2023-11-08 13:58:20.855391: E external/xla/xla/stream_executor/cuda/cuda_driver.cc:268] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
xla_bridge.py:backends():622: Unable to initialize backend 'cuda': FAILED_PRECONDITION: No visible GPU devices. INFO
xla_bridge.py:backends():622: Unable to initialize backend 'rocm': NOT_FOUND: Could not find registered platform with name: "rocm". Available platform names are: CUDA Interpreter INFO
xla_bridge.py:backends():622: Unable to initialize backend 'tpu': module 'jaxlib.xla_extension' has no attribute 'get_tpu_client' INFO


[CpuDevice(id=0)]

In [3]:
fn_RNAInter = os.path.join('..', 'data', 'sRNA', 'RNAInter', 'Download_data_RR.csv')
data = pd.read_csv(fn_RNAInter)
try:
    data = data.drop(columns=['Unnamed: 0'])
except:
    pass


In [4]:
print(len(data))
nn = np.sum(np.arange(1, len(data)+1)) + len(data)
print('Expected number of species: ', nn)
data.head()

354
Expected number of species:  63189


Unnamed: 0,index,RNAInterID,Interactor1.Symbol,Category1,Species1,Interactor2.Symbol,Category2,Species2,Raw_ID1,Raw_ID2,score,strong,weak,predict,Sequence1,Sequence2
0,1029724,RR05384747,acnA,mRNA,Escherichia coli str. K-12 substr. MG1655,ryhB,sRNA,Escherichia coli str. K-12 substr. MG1655,NCBI:946724,NCBI:2847761,0.2292,Northern blot//Reporter assay,,,ATTCGGAACGAGGCCTGAAGCAGTGTCGCCGTCCCTCTGCCTTGCA...,TTTGAGAGCGTCTCTGTCCCTCGTTTTGCGGTTAAGCCGCATCCAT...
1,1029725,RR05384845,acrZ,mRNA,Escherichia coli str. K-12 substr. MG1655,omrB,sRNA,Escherichia coli str. K-12 substr. MG1655,NCBI:945365,NCBI:2847747,0.1778,Northern blot,Microarray,,ACTTACTACTGTCTTCGGGGGGTCCGAGGTTTCTGGGGGGTCGTAC...,TGTTCTATACTTGGGTTCGACTTGGGTTAGACTTGTCTTTACTGTC...
2,1029732,RR05387056,arcZ,sRNA,Escherichia coli str. K-12 substr. MG1655,flhD,mRNA,Escherichia coli str. K-12 substr. MG1655,NCBI:2847690,NCBI:945442,0.2314,Northern blot//Reporter assay,,,CTCATGTTGACCGCTTGTTTAGCAGCTTCAAGGAAGCTGAAGGGCA...,CTCGTGCCGAATTCGGCACGAGCGATATTTCATCAGTTATCGGTAA...
3,1029733,RR05387057,arcZ,sRNA,Escherichia coli str. K-12 substr. MG1655,rpoS,mRNA,Escherichia coli str. K-12 substr. MG1655,NCBI:2847690,NCBI:947210,0.1996,Northern blot//RACE//RT-PCR//Beta-galactosidas...,,,CTCATGTTGACCGCTTGTTTAGCAGCTTCAAGGAAGCTGAAGGGCA...,ATCCTCGGGTCTTGCAGGCCACACAGGACACCCTGAACCGTCATGG...
4,1029734,RR05387126,argR,mRNA,Escherichia coli str. K-12 substr. MG1655,dsrA,sRNA,Escherichia coli str. K-12 substr. MG1655,NCBI:947861,NCBI:946470,0.2139,Primer extension assay,,,GACAATGGCGATAGTATGGCGGTTGTTTCTTTCCCATCTCTACTCA...,TGAGCAACTTTATTCACATAATTTCTACACCAAGAACTCGAGGTTA...


In [50]:
# srnas = list(set(data[data['Category1'] == 'sRNA']['Interactor1.Symbol'].to_list() + data[data['Category2'] == 'sRNA']['Interactor2.Symbol'].to_list()))
# mrnas = list(set(data[data['Category1'] == 'mRNA']['Interactor1.Symbol'].to_list() + data[data['Category2'] == 'mRNA']['Interactor2.Symbol'].to_list()))

srnas, mrnas = {}, {}
for s in data[data['Category1'] == 'sRNA']['Interactor1.Symbol'].unique():
    srnas[s] = data[data['Interactor1.Symbol'] == s]['Sequence1'].iloc[0]

for s in data[data['Category2'] == 'sRNA']['Interactor2.Symbol'].unique():
    srnas[s] = data[data['Interactor2.Symbol'] == s]['Sequence2'].iloc[0]

for m in data[data['Category1'] == 'mRNA']['Interactor1.Symbol'].unique():
    seq = data[data['Interactor1.Symbol'] == m]['Sequence1'].iloc[0]
    if type(seq) == str:
        mrnas[m] = seq

for m in data[data['Category2'] == 'mRNA']['Interactor2.Symbol'].unique():
    seq = data[data['Interactor2.Symbol'] == m]['Sequence2'].iloc[0]
    if type(seq) == str:
        mrnas[m] = seq

In [6]:
[i for i in mrnas.keys() if i in list(srnas.keys())]

[]

In [7]:

config = {
    "experiment": {
        "purpose": "tests",
        "no_visualisations": False,
        "no_numerical": False,
        "debug_mode": False
    },
    "data": {},
    "system_type": "RNA",
    "include_prod_deg": False,
    "interaction_simulator": {
        "name": "IntaRNA",
        "postprocess": True
    },
    "molecular_params_factor": 1,
    "molecular_params": {
        "avg_mRNA_per_cell": 100,
        "cell_doubling_time": 1200,
        "creation_rate": 2.35,
        "starting_copynumbers": 200,
        "degradation_rate": 0.01175,
        "association_binding_rate": 1000000
    }
}
data_writer = None
config, data_writer = script_preamble(config, data_writer)
config_file = expand_config(config=config)
config_file = prepare_config(config_file)


## Create circuits

In [8]:
len(srnas) * len(mrnas)

5180

In [27]:
config['interaction_simulator']['threads'] = 8
config['interaction_simulator']['n'] = 2
config['interaction_simulator']['raw_stdout'] = True

In [43]:
def process_raw_stdout(stdout):
    d = {}
    header = stdout.split('\n')[0].split(';')
    for t in stdout.split('\n')[1:-1]:
        d[t.split(';')[0]] = {
            header[2]: float(t.split(';')[2]),
            header[3]: t.split(';')[3]
        }
    return d

In [51]:
from subprocess import Popen, PIPE, run
from datetime import datetime
circuits = []
sim_data = {}

def simulate_IntaRNA_local(query: dict,
                           targets: dict,
                           sim_kwargs={},
                           simulator=IntaRNA()):
    fn1 = data_writer.output(data=query, out_type='fasta', out_name='query', byseq=True, return_path=True)
    fn2 = data_writer.output(data=targets, out_type='fasta', out_name='targets', byseq=True, return_path=True)
    sim_kwargs['query'] = fn1
    sim_kwargs['target'] = fn2
    
    def run(query: str, target: str, qidxpos0: int, tidxpos0: int, outcsvcols: str, threads: int, n: int = 1,
            param_file: str = '', extra_params: list = [], raw_stdout: bool = False):
        p = Popen(['IntaRNA', '-q', query, '-t', target,
                    '--outMode=C', f'--outcsvcols={outcsvcols}',
                    f'--qIdxPos0={qidxpos0}',
                    f'--tIdxPos0={tidxpos0}',
                    f'--outNumber={n}',
                    f'--threads={threads}', param_file]
                    + extra_params, stdout=PIPE, stderr=PIPE, universal_newlines=True)
        stdout, stderr = p.communicate()
        return process_raw_stdout(stdout)
    
    return run(**sim_kwargs)


for s, sseq in srnas.items():
    sim_data[s] = {}
    t = datetime.now()

    sim_data[s] = simulate_IntaRNA_local(query={s: sseq},
                                         targets=mrnas,
                                         sim_kwargs=config_file['interaction_simulator']['simulator_kwargs'],
                                         simulator=IntaRNA())
    print('Finished ', s, ' in ', (datetime.now() - t).total_seconds(), ' s')

    data_writer.output(data=sim_data, out_type='json',
                       out_name='inter_data_raw', overwrite=True)

Finished  arcZ  in  54.910919  s
Finished  arrS  in  35.321667  s
Finished  chiX  in  78.424952  s
Finished  cyaR  in  71.926472  s
Finished  dicF  in  48.124806  s
Finished  dsrA  in  67.317026  s
Finished  fnrS  in  40.348168  s


KeyboardInterrupt: 