In [1]:
%load_ext autoreload
%autoreload 2

# Comparison of mutational robustness of sRNA's vs. randomly generated RNA

## Imports

In [2]:
import os
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
import pandas as pd

from synbio_morpher.srv.io.manage.script_manager import script_preamble
from synbio_morpher.srv.sequence_exploration.sequence_analysis import b_tabulate_mutation_info
from synbio_morpher.utils.common.setup import construct_circuit_from_cfg, prepare_config, expand_config
from synbio_morpher.utils.evolution.evolver import Evolver
from synbio_morpher.utils.circuit.agnostic_circuits.circuit_manager import CircuitModeller
from synbio_morpher.utils.misc.type_handling import flatten_listlike
from synbio_morpher.utils.data.data_format_tools.common import load_multiple_as_list


import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import jax
jax.config.update('jax_platform_name', 'cpu')

jax.devices()



I0000 00:00:1699884403.051617   73126 tfrt_cpu_pjrt_client.cc:349] TfrtCpuClient created.
xla_bridge.py:backends():513: Unable to initialize backend 'rocm': NOT_FOUND: Could not find registered platform with name: "rocm". Available platform names are: CUDA INFO
xla_bridge.py:backends():513: Unable to initialize backend 'tpu': INTERNAL: Failed to open libtpu.so: libtpu.so: cannot open shared object file: No such file or directory INFO


[CpuDevice(id=0)]

In [3]:
fn = '../data/sRNA/merged_EcoCyc_RNAInter.csv'
data = pd.read_csv(fn)
data = data[data['Database'] == 'EcoCyc']
try:
    data = data.drop(columns=['Unnamed: 0'])
except:
    pass


In [4]:
len(data)

69

In [7]:
sum(data['Sequence'].str.len())

9607

In [5]:
circuit_fasta = os.path.join('data', 'merged.fasta')
circuit_fasta_d = {}
with open(circuit_fasta, 'w') as f:

    for n in list(data['Name'].unique()):
        circuit_fasta_d[n] = data[data['Name'] == n]['Sequence'].iloc[0].replace('T', 'U')
        f.write('>' + n.replace(' ', '_') + '\n')
        f.write(circuit_fasta_d[n])
        f.write('\n')

In [6]:
nn = np.sum(np.arange(1, len(circuit_fasta_d)+1)) + len(circuit_fasta_d)
print('Expected number of species: ', nn)

Expected number of species:  2484


# Config

In [7]:
config = {
    "experiment": {
        "purpose": "tests",
        "no_visualisations": False,
        "no_numerical": False,
        "debug_mode": False
    },
    "data_path": circuit_fasta,
    "system_type": "RNA",
    "include_prod_deg": False,
    "interaction_simulator": {
        "name": "IntaRNA",
        "postprocess": True
    },
    "mutations_args": {
        "algorithm": "all",
        "mutation_counts": 0,
        "mutation_nums_within_sequence": [1],
        "mutation_nums_per_position": 1,
        "concurrent_species_to_mutate": "single_species_at_a_time",
        "seed": 0
    },
    "signal": {
        "inputs": [data['Name'].iloc[0]],
        "outputs": [],
        "function_name": "step_function",
        "function_kwargs": {
            "impulse_center": 1,
            "impulse_halfwidth": 5,
            "target": 2
        }
    },
    "simulation": {
        "dt0": 0.1,
        "t0": 0,
        "t1": 100,
        "tmax": 2000,
        "solver": "diffrax",
        "use_batch_mutations": True,
        "interaction_factor": 1,
        "batch_size": 1000,
        "max_circuits": 2100,
        "device": "cpu",
        "threshold_steady_states": 0.05,
        "use_rate_scaling": True
    },
    "molecular_params_factor": 1,
    "molecular_params": {
        "avg_mRNA_per_cell": 100,
        "cell_doubling_time": 1200,
        "creation_rate": 2.35,
        "starting_copynumbers": 200,
        "degradation_rate": 0.01175,
        "association_binding_rate": 1000000
    }
}

# Simulate

In [8]:
data_writer = None
config, data_writer = script_preamble(config, data_writer)
config_file = expand_config(config=config)
config_file = prepare_config(config_file)

In [11]:
circuits = []
for s in circuit_fasta_d:
    config_file['mutations_args']['concurrent_species_to_mutate'] = s.replace(' ', '_')
    # config_file['data_path'] = 'data/circuit.fasta'
    config_file['data_path'] = 'data/merged.fasta'
    circuit = construct_circuit_from_cfg(
        prev_configs=None, config_file=config_file)
    circuit = Evolver(data_writer=data_writer, sequence_type=config_file.get('system_type'),
                      seed=config_file['mutations_args'].get('seed', 0),
                      concurrent_species_to_mutate=config_file['mutations_args']['concurrent_species_to_mutate']).mutate(
        circuit,
        write_to_subsystem=True, algorithm=config_file.get('mutations_args', {}).get('algorithm', 'random'))
    circuits.append(circuit)

len(circuit.model.species)

In [None]:
from synbio_morpher.utils.misc.type_handling import flatten_nested_dict
for circuit in circuits:
    num_subcircuits = [len(flatten_nested_dict(
        c.mutations)) + 1 for c in [circuit]]
    tot_subcircuits = sum(num_subcircuits)

print(num_subcircuits)

In [None]:
# from synbio_morpher.utils.common.setup import compose_kwargs
# modeller = CircuitModeller(result_writer=data_writer, config=config_file)
# curr_subcircuits = []
# for i, circuit in enumerate(circuits[:2]):
#     curr_subcircuits.append(modeller.load_mutations(circuit))
# curr_subcircuits = flatten_listlike(curr_subcircuits)




: 

In [None]:
modeller = CircuitModeller(result_writer=data_writer, config=config_file)
circuits = modeller.batch_circuits(
    circuits=circuits,
    write_to_subsystem=True, batch_size=config_file['simulation'].get('batch_size', 100),
    methods={
        "compute_interactions": {},
        # "init_circuits": {'batch': True},
        # "simulate_signal_batch": {'ref_circuit': None,
        #                           'batch': True},
        "write_results": {'no_visualisations': config_file['experiment'].get('no_visualisations', True),
                          'no_numerical': config_file['experiment'].get('no_numerical', False)}
    })



KeyboardInterrupt: 

## Save

In [None]:
# source_dirs = [data_writer.top_write_dir]
# source_dirs = ['data/tests/2023_11_03_104039/']
# tables = load_multiple_as_list(inputs_list=source_dirs, load_func=b_tabulate_mutation_info, 
#             data_writer=data_writer)
# saves = pd.concat(tables, axis=0)
# saves

Unnamed: 0,circuit_name,mutation_name,mutation_num,mutation_type,mutation_positions,path_to_template_circuit,index,name,interacting,self_interacting,...,energies_1-0,energies_1-1,eqconstants_0-0,eqconstants_0-1,eqconstants_1-0,eqconstants_1-1,num_interacting_diff_to_base_circuit,num_self_interacting_diff_to_base_circuit,num_interacting_ratio_from_mutation_to_base,num_self_interacting_ratio_from_mutation_to_base
0,merged,ref_circuit,0,[],[],data/merged.fasta,0.0,merged,[],[[0 0]],...,0.00,0.0,1.170795,0.000002,0.000002,0.000002,0,0,0.0,1.0
1,merged,6S_RNA_m1-0,1,[2],[0],data/merged.fasta,0.0,merged,[],[[0 0]],...,0.00,0.0,1.170795,0.000002,0.000002,0.000002,0,0,0.0,1.0
2,merged,6S_RNA_m1-1,1,[1],[0],data/merged.fasta,1.0,merged,[],[[0 0]],...,0.00,0.0,1.124887,0.000002,0.000002,0.000002,0,0,0.0,1.0
3,merged,6S_RNA_m1-10,1,[9],[3],data/merged.fasta,10.0,merged,[],[[0 0]],...,0.00,0.0,0.322931,0.000002,0.000002,0.000002,0,0,0.0,1.0
4,merged,6S_RNA_m1-100,1,[3],[33],data/merged.fasta,100.0,merged,[],[[0 0]],...,0.00,0.0,0.174415,0.000002,0.000002,0.000002,0,0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
737,merged,antisense_regulatory_RNA_RdlD_m1-95,1,[8],[31],data/merged.fasta,644.0,merged,[[0 1]],[[0 0]],...,-0.16,0.0,1.170795,0.000002,0.000002,0.000002,-1,0,0.0,1.0
738,merged,antisense_regulatory_RNA_RdlD_m1-96,1,[11],[32],data/merged.fasta,645.0,merged,[],[[0 0]],...,0.00,0.0,1.170795,0.000002,0.000002,0.000002,0,0,0.0,1.0
739,merged,antisense_regulatory_RNA_RdlD_m1-97,1,[9],[32],data/merged.fasta,646.0,merged,[],[[0 0]],...,0.00,0.0,1.170795,0.000002,0.000002,0.000002,0,0,0.0,1.0
740,merged,antisense_regulatory_RNA_RdlD_m1-98,1,[10],[32],data/merged.fasta,647.0,merged,[],[[0 0]],...,0.00,0.0,1.170795,0.000002,0.000002,0.000002,0,0,0.0,1.0


# Compare

In [None]:
# fn_random = '../data/processed/ensemble_mutation_effect_analysis/2023_07_17_105328/tabulated_mutation_info.csv'
# data_r = pd.read_csv(fn_random)

In [None]:
# data_r[get_true_]