In [1]:
%load_ext autoreload
%autoreload 2

# Comparison of mutational robustness of sRNA's vs. randomly generated RNA

## Imports

In [2]:
import os
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
import pandas as pd

from synbio_morpher.srv.io.manage.script_manager import script_preamble
from synbio_morpher.srv.sequence_exploration.sequence_analysis import b_tabulate_mutation_info
from synbio_morpher.utils.common.setup import construct_circuit_from_cfg, prepare_config, expand_config
from synbio_morpher.utils.evolution.evolver import Evolver
from synbio_morpher.utils.circuit.agnostic_circuits.circuit_manager import CircuitModeller
from synbio_morpher.utils.misc.type_handling import flatten_listlike
from synbio_morpher.utils.data.data_format_tools.common import load_multiple_as_list
from synbio_morpher.utils.misc.type_handling import flatten_nested_dict


import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import jax
jax.config.update('jax_platform_name', 'cpu')

jax.devices()



xla_bridge.py:backends():603: Unable to initialize backend 'rocm': NOT_FOUND: Could not find registered platform with name: "rocm". Available platform names are: Interpreter CUDA INFO
xla_bridge.py:backends():603: Unable to initialize backend 'tpu': module 'jaxlib.xla_extension' has no attribute 'get_tpu_client' INFO


[CpuDevice(id=0)]

In [3]:
fn = '../data/sRNA/merged_inter.csv'
data = pd.read_csv(fn)
try:
    data = data.drop(columns=['Unnamed: 0'])
except:
    pass
data = data[data['Sequence 1'].apply(lambda x: type(x) == str)]
data = data[data['Sequence 2'].apply(lambda x: type(x) == str)]


In [5]:
data

Unnamed: 0,Interactor 1,Interactor 2,ID 1,ID 2,Category 1,Category 2,Regulation,Binding position 1,Binding position 2,Sequence 1,Sequence 2,Source DB
0,micA,dppA,NCBI:2847697,NCBI:948062,sRNA,mRNA,,,,CAAAAATGGTGTCGCTCGTATTATTGTCGAGAAACCTTTCGGCAAG...,ATGCGTATTTCCTTGAAAAAGTCAGGGATGCTGAAGCTTGGTCTCA...,RNAInter
1,dsrA,dsrB,NCBI:946470,NCBI:946468,sRNA,mRNA,,,,AACACATCAGATTTCCTGGTGTAACGAATTTTTTAAGTGCTTCTTG...,TAAATGNAAACAAGTTTTATTTTTATCATCAGAACATTCTAATAAA...,RNAInter
2,dsrA,ilvH,NCBI:946470,NCBI:947267,sRNA,mRNA,,,,AACACATCAGATTTCCTGGTGTAACGAATTTTTTAAGTGCTTCTTG...,ATTCGCCACAACAAGGACAGGAAGGTTCGGCGCAAGGAGCCCAAGA...,RNAInter
3,micA,ecnB,NCBI:2847697,NCBI:2847737,sRNA,mRNA,,,,CAAAAATGGTGTCGCTCGTATTATTGTCGAGAAACCTTTCGGCAAG...,ATGGTGAAGAAGACAATTGCAGCGATCTTTTCTGTTCTGGTGCTTT...,RNAInter
4,oxyS,fhlA,NCBI:2847701,NCBI:947181,sRNA,mRNA,,,,GAAACGGAGCGGCACCTCTTTTAACCCTTGAAGTCACTGCCCGTTT...,CACCAGATTGCCAAGCTGATGGTCGAGCTGTCCAAATCCCAGGATG...,RNAInter
...,...,...,...,...,...,...,...,...,...,...,...,...
444,ryhB,msrA,"chromosome:NC_000913.3, Gene ID:2847761","chromosome:NC_000913.3, Gene ID:948734",,mRNA,No Interaction,['NA'],['NA'],GCGATCAGGAAGACCCTCGCGGAGAACCTGAAAGCACGACATTGCT...,ATGAGTTTATTTGATAAAAAGCATCTGGTTTCCCCCGCCGATGCCC...,sRNATarBase
445,gcvB,hdeA,"chromosome:NC_000913.3, Gene ID:2847720","chromosome:NC_000913.3, Gene ID:948025",trans-encoded antisense RNA,mRNA,Induction,['NA'],['NA'],ACTTCCTGAGCCGGAACGAAAAGTTTTATCGGAATGCGTGTTCTGG...,ATGAAAAAAGTATTAGGCGTTATTCTTGGTGGTCTGCTTCTTCTGC...,sRNATarBase
446,gcvB,hdeB,"chromosome:NC_000913.3, Gene ID:2847720","chromosome:NC_000913.3, Gene ID:948026",trans-encoded antisense RNA,mRNA,Induction,['NA'],['NA'],ACTTCCTGAGCCGGAACGAAAAGTTTTATCGGAATGCGTGTTCTGG...,ATGAATATTTCATCTCTCCGTAAAGCGTTTATTTTTATGGGCGCTG...,sRNATarBase
447,Esr41,fliC,"chromosome:NC_002695.1, Gene ID:","chromosome:NC_002695.1, Gene ID:",trans-encoded antisense RNA,mRNA,Induction,['NA'],['NA'],GATGCTCTAGGCATCACATTTTCTCCATGGGGTATTCCCTCCGCCG...,ATGGCACAAGTCATTAATACCAACAGCCTCTCGCTGATCACTCAAA...,sRNATarBase


In [6]:
len(data)

446

# Config

In [7]:
config = {
    "experiment": {
        "purpose": "tests",
        "no_visualisations": False,
        "no_numerical": False,
        "debug_mode": False
    },
    "system_type": "RNA",
    "include_prod_deg": False,
    "interaction_simulator": {
        "name": "IntaRNA",
        "postprocess": True,
        "simulator_kwargs": {
            "outcsvcols": "id1, id2, E, E_norm, seedPu1, seedPu2, seedStart1, seedStart2, seedEnd1, seedEnd2",
            "threads": 12,
            "n": 1,
            "raw_stdout": True
        }
    },
    "mutations_args": {
        "algorithm": "all",
        "mutation_counts": 0,
        "mutation_nums_within_sequence": [1],
        "mutation_nums_per_position": 1,
        "concurrent_species_to_mutate": "single_species_at_a_time",
        "seed": 0
    },
    "signal": {
        "inputs": [data['Interactor 1'].iloc[0]],
        "outputs": [],
        "function_name": "step_function",
        "function_kwargs": {
            "impulse_center": 1,
            "impulse_halfwidth": 5,
            "target": 2
        }
    },
    "simulation": {
        "dt0": 0.1,
        "t0": 0,
        "t1": 100,
        "tmax": 2000,
        "solver": "diffrax",
        "use_batch_mutations": True,
        "interaction_factor": 1,
        "batch_size": 2000,
        "max_circuits": 9000,
        "device": "cpu",
        "threshold_steady_states": 0.05,
        "use_rate_scaling": True
    },
    "molecular_params_factor": 1,
    "molecular_params": {
        "avg_mRNA_per_cell": 100,
        "cell_doubling_time": 1200,
        "creation_rate": 2.35,
        "starting_copynumbers": 200,
        "degradation_rate": 0.01175,
        "association_binding_rate": 1000000
    }
}



# Simulate

In [8]:
data_writer = None
config, data_writer = script_preamble(config, data_writer)
config_file = expand_config(config=config)
config_file = prepare_config(config_file)

In [10]:
Evolver(data_writer=data_writer, sequence_type=config_file.get('system_type'),
                          seed=config_file.get('mutations_args', {}).get('seed', 0)).concurrent_species_to_mutate

''

In [16]:
circuits = []
for sp in data.iterrows():
    sp = sp[1]
    config_file['mutations_args']['concurrent_species_to_mutate'] = sp['Interactor 1']
    # config_file['data_path'] = 'data/circuit.fasta'
    config_file['data'] = {
        sp['Interactor 1']: sp['Sequence 1'].replace('T', 'U'),
        sp['Interactor 2']: sp['Sequence 2'].replace('T', 'U')
    }
    circuit = construct_circuit_from_cfg(
        prev_configs=None, config_file=config_file)
    circuit = Evolver(data_writer=data_writer, sequence_type=config_file.get('system_type'),
                      seed=config_file['mutations_args'].get('seed', 0),
                      concurrent_species_to_mutate=config_file['mutations_args']['concurrent_species_to_mutate']).mutate(
        circuit,
        write_to_subsystem=True, algorithm=config_file.get('mutations_args', {}).get('algorithm', 'random'))
    circuits.append(circuit)

len(circuit.model.species)

5

In [12]:
# for circuit in circuits:
#     num_subcircuits = [len(flatten_nested_dict(
#         c.mutations)) + 1 for c in [circuit]]
#     tot_subcircuits = sum(num_subcircuits)

# print(num_subcircuits)

[433]


In [13]:
# from synbio_morpher.utils.common.setup import compose_kwargs
# modeller = CircuitModeller(result_writer=data_writer, config=config_file)
# curr_subcircuits = []
# for i, circuit in enumerate(circuits[:3]):
#     curr_subcircuits.append(modeller.load_mutations(circuit))
# curr_subcircuits = flatten_listlike(curr_subcircuits)


In [14]:
# modeller = CircuitModeller(result_writer=data_writer, config=config_file)
# circuits = modeller.batch_circuits(
#     circuits=circuits,
#     write_to_subsystem=True, batch_size=config_file['simulation'].get('batch_size', 100),
#     methods={
#         "compute_interactions": {},
#         # "init_circuits": {'batch': True},
#         # "simulate_signal_batch": {'ref_circuit': None,
#         #                           'batch': True},
#         "write_results": {'no_visualisations': config_file['experiment'].get('no_visualisations', True),
#                           'no_numerical': config_file['experiment'].get('no_numerical', False)}
#     })





KeyboardInterrupt: 

## Save

In [None]:
# source_dirs = [data_writer.top_write_dir]
# source_dirs = ['data/tests/2023_11_03_104039/']
# tables = load_multiple_as_list(inputs_list=source_dirs, load_func=b_tabulate_mutation_info, 
#             data_writer=data_writer)
# saves = pd.concat(tables, axis=0)
# saves

Unnamed: 0,circuit_name,mutation_name,mutation_num,mutation_type,mutation_positions,path_to_template_circuit,index,name,interacting,self_interacting,...,energies_1-0,energies_1-1,eqconstants_0-0,eqconstants_0-1,eqconstants_1-0,eqconstants_1-1,num_interacting_diff_to_base_circuit,num_self_interacting_diff_to_base_circuit,num_interacting_ratio_from_mutation_to_base,num_self_interacting_ratio_from_mutation_to_base
0,merged,ref_circuit,0,[],[],data/merged.fasta,0.0,merged,[],[[0 0]],...,0.00,0.0,1.170795,0.000002,0.000002,0.000002,0,0,0.0,1.0
1,merged,6S_RNA_m1-0,1,[2],[0],data/merged.fasta,0.0,merged,[],[[0 0]],...,0.00,0.0,1.170795,0.000002,0.000002,0.000002,0,0,0.0,1.0
2,merged,6S_RNA_m1-1,1,[1],[0],data/merged.fasta,1.0,merged,[],[[0 0]],...,0.00,0.0,1.124887,0.000002,0.000002,0.000002,0,0,0.0,1.0
3,merged,6S_RNA_m1-10,1,[9],[3],data/merged.fasta,10.0,merged,[],[[0 0]],...,0.00,0.0,0.322931,0.000002,0.000002,0.000002,0,0,0.0,1.0
4,merged,6S_RNA_m1-100,1,[3],[33],data/merged.fasta,100.0,merged,[],[[0 0]],...,0.00,0.0,0.174415,0.000002,0.000002,0.000002,0,0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
737,merged,antisense_regulatory_RNA_RdlD_m1-95,1,[8],[31],data/merged.fasta,644.0,merged,[[0 1]],[[0 0]],...,-0.16,0.0,1.170795,0.000002,0.000002,0.000002,-1,0,0.0,1.0
738,merged,antisense_regulatory_RNA_RdlD_m1-96,1,[11],[32],data/merged.fasta,645.0,merged,[],[[0 0]],...,0.00,0.0,1.170795,0.000002,0.000002,0.000002,0,0,0.0,1.0
739,merged,antisense_regulatory_RNA_RdlD_m1-97,1,[9],[32],data/merged.fasta,646.0,merged,[],[[0 0]],...,0.00,0.0,1.170795,0.000002,0.000002,0.000002,0,0,0.0,1.0
740,merged,antisense_regulatory_RNA_RdlD_m1-98,1,[10],[32],data/merged.fasta,647.0,merged,[],[[0 0]],...,0.00,0.0,1.170795,0.000002,0.000002,0.000002,0,0,0.0,1.0


# Compare

In [11]:
ddir = '/home/wadh6511/Kode/EvoScaper/notebooks_sRNA/data/04_distribution_mutational_robustness/2023_11_10_191219/mutations'
mutations = pd.read_csv('/home/wadh6511/Kode/EvoScaper/notebooks_sRNA/data/04_distribution_mutational_robustness/2023_11_10_191219/mutations.csv')

print(f'Number simulated: {len(os.listdir(ddir))}')
print(f'Percent simulated: {len(os.listdir(ddir)) / len(mutations) * 100} %')
print(f'Total number of mutations: {len(mutations)}')

Number simulated: 3363
Total number of mutations: 28821
