In [1]:
%load_ext autoreload
%autoreload 2

# Simulating the effect of mutations on sRNA networks

In this notebook we will demonstrate a workflow for simulating mutations in RNA and examining the effect that this would have on RNA networks / circuits. We will use the `synbio_morpher` to predict the interaction strength and simulate the effect on RNA.

## Imports

In [2]:
import os
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
import pandas as pd

from synbio_morpher.srv.io.manage.script_manager import script_preamble
from synbio_morpher.utils.common.setup import construct_circuit_from_cfg, prepare_config, expand_config
from synbio_morpher.utils.evolution.evolver import Evolver
from synbio_morpher.utils.circuit.agnostic_circuits.circuit_manager import CircuitModeller

import numpy as np
import jax
jax.config.update('jax_platform_name', 'gpu')



## Introduction to `synbio_morpher`

The package `synbio_morpher` allows for easy construction of genetic circuits and their simulation. A genetic circuit can be created by putting all the component RNA species into a FASTA file, or by directly creating a dictionary that contains all the RNA component parts. A configuration file governs customisations involved in specifying the creation, mutation, and simulation of circuits, each of which are created as a `Circuit` object. The `CircuitModller` class can then be used on batched `Circuit` objects and simulate the time series of these circuits and their mutated counterparts in parallel.

### First let's make a FASTA for the intended toy circuit

In [3]:
fn = os.path.join('..', 'data', 'sRNA', 'RNAInter', 'Download_data_RR.csv')
data = pd.read_csv(fn)
try:
    data = data.drop(columns=['Unnamed: 0'])
except:
    pass

In [4]:
filt = ((~ data['Sequence1'].isna()) & (~ data['Sequence2'].isna()))
data[filt]['Interactor1.Symbol'].value_counts()[data['Interactor1.Symbol'].value_counts() == 4]

Interactor1.Symbol
galK    4
ompF    4
flhD    4
Name: count, dtype: int64

In [5]:
sRNA1 = 'galK'

filt = (data['Interactor1.Symbol'] == sRNA1) & ((~ data['Sequence1'].isna()) & (~ data['Sequence2'].isna()))
data[filt].head()

Unnamed: 0,index,RNAInterID,Interactor1.Symbol,Category1,Species1,Interactor2.Symbol,Category2,Species2,Raw_ID1,Raw_ID2,score,strong,weak,predict,Sequence1,Sequence2
110,1713510,RR05458723,galK,mRNA,Escherichia coli str. K-12 substr. MG1655,micA,sRNA,Escherichia coli str. K-12 substr. MG1655,NCBI:945358,NCBI:2847697,0.1983,Northern blot,,,ACTTACTACTGTGCCTTTCCTAGGCCTTACTGGGGGATCACCACCT...,CAAAAATGGTGTCGCTCGTATTATTGTCGAGAAACCTTTCGGCAAG...
111,1713511,RR05458724,galK,mRNA,Escherichia coli str. K-12 substr. MG1655,micC,sRNA,Escherichia coli str. K-12 substr. MG1655,NCBI:945358,NCBI:2847713,0.1983,Northern blot,,,ACTTACTACTGTGCCTTTCCTAGGCCTTACTGGGGGATCACCACCT...,ATTTACGCTCATTGCGCATGGGTGTTTGTCCAGCAATTCTTGAATC...
112,1713512,RR05458725,galK,mRNA,Escherichia coli str. K-12 substr. MG1655,micF,sRNA,Escherichia coli str. K-12 substr. MG1655,NCBI:945358,NCBI:2847742,0.1983,Northern blot,,,ACTTACTACTGTGCCTTTCCTAGGCCTTACTGGGGGATCACCACCT...,AAAGCCAACTCAACTCAACTCATCAAAATGCGCTTCATCGTCTCTC...
113,1713513,RR05458726,galK,mRNA,Escherichia coli str. K-12 substr. MG1655,ryhB,sRNA,Escherichia coli str. K-12 substr. MG1655,NCBI:945358,NCBI:2847761,0.1983,Northern blot,,,ACTTACTACTGTGCCTTTCCTAGGCCTTACTGGGGGATCACCACCT...,TTTGAGAGCGTCTCTGTCCCTCGTTTTGCGGTTAAGCCGCATCCAT...


Turn everything into RNA

In [6]:
data['Sequence1'] = data['Sequence1'].str.replace('T', 'U')
data['Sequence2'] = data['Sequence2'].str.replace('T', 'U')

In [7]:
circuit_fasta = 'data/circuit.fasta'
circuit_fasta_d = {}

with open(circuit_fasta, 'w') as f:

    for interactor, seqN in [('Interactor1.Symbol', 'Sequence1'), ('Interactor2.Symbol', 'Sequence2')]:
        for r in list(data[filt][interactor].unique()):
            circuit_fasta_d[r] = data[data[interactor] == r][seqN].iloc[0]
            f.write('>' + r + '\n')
            f.write(data[data[interactor] == r][seqN].iloc[0])
            f.write('\n')

In [8]:
print('Length of each RNA molecule is the following:')
{k: len(i) for k, i in circuit_fasta_d.items()}

Length of each RNA molecule is the following:


{'galK': 57, 'micA': 447, 'micC': 166, 'micF': 488, 'ryhB': 490}

### Configuration for simulation

In [9]:
config = {
    "experiment": {
        "purpose": "tests",
        "no_visualisations": True,
        "no_numerical": True,
        "debug_mode": False
    },
    "data_path": circuit_fasta,
    "system_type": "RNA",
    "include_prod_deg": False,
    "interaction_simulator": {
        "name": "IntaRNA",
        "postprocess": True
    },
    "mutations_args": {
        "algorithm": "random",
        "mutation_counts": 5,
        "mutation_nums_within_sequence": [1, 3, 5, 20],
        "mutation_nums_per_position": 1,
        "concurrent_species_to_mutate": "single_species_at_a_time",
        "seed": 0
    },
    "signal": {
        "inputs": [sRNA1],
        "outputs": [],
        "function_name": "step_function",
        "function_kwargs": {
            "impulse_center": 1,
            "impulse_halfwidth": 5,
            "target": 2
        }
    },
    "simulation": {
        "dt0": 0.1,
        "t0": 0,
        "t1": 100,
        "tmax": 2000,
        "solver": "diffrax",
        "use_batch_mutations": True,
        "interaction_factor": 1,
        "batch_size": 20000,
        "max_circuits": 60000,
        "device": "gpu",
        "threshold_steady_states": 0.05,
        "use_rate_scaling": True
    },
    "molecular_params_factor": 1,
    "molecular_params": {
        "avg_mRNA_per_cell": 100,
        "cell_doubling_time": 1200,
        "creation_rate": 2.35,
        "starting_copynumbers": 200,
        "degradation_rate": 0.01175,
        "association_binding_rate": 1000000
    }
}

### Circuit construction

In [10]:
data_writer=None
config, data_writer = script_preamble(config, data_writer)
config_file = expand_config(config=config)
config_file = prepare_config(config_file)
circuit = construct_circuit_from_cfg(prev_configs=None, config_file=config_file)

circuit = Evolver(data_writer=data_writer, sequence_type=config_file.get('system_type'), 
                seed=config_file.get('mutations_args', {}).get('seed', np.random.randint(1000))).mutate(
                    circuit,
                    write_to_subsystem=True, algorithm=config_file.get('mutations_args', {}).get('algorithm', 'random'))

xla_bridge.py:backends():622: Unable to initialize backend 'rocm': NOT_FOUND: Could not find registered platform with name: "rocm". Available platform names are: CUDA Interpreter INFO
xla_bridge.py:backends():622: Unable to initialize backend 'tpu': module 'jaxlib.xla_extension' has no attribute 'get_tpu_client' INFO


In [11]:
circuit.model.species

[Species: ('galK', 'galK'),
 Species: ('galK', 'micA'),
 Species: ('galK', 'micC'),
 Species: ('galK', 'micF'),
 Species: ('galK', 'ryhB'),
 Species: ('micA', 'micA'),
 Species: ('micA', 'micC'),
 Species: ('micA', 'micF'),
 Species: ('micA', 'ryhB'),
 Species: ('micC', 'micC'),
 Species: ('micC', 'micF'),
 Species: ('micC', 'ryhB'),
 Species: ('micF', 'micF'),
 Species: ('micF', 'ryhB'),
 Species: ('ryhB', 'ryhB'),
 Species: galK,
 Species: micA,
 Species: micC,
 Species: micF,
 Species: ryhB]

In [12]:
circuit.model.reactions

[Reaction(input=[Species: galK, Species: galK], output=[Species: ('galK', 'galK')], forward_rate=nan, reverse_rate=nan),
 Reaction(input=[Species: galK, Species: micA], output=[Species: ('galK', 'micA')], forward_rate=nan, reverse_rate=nan),
 Reaction(input=[Species: galK, Species: micC], output=[Species: ('galK', 'micC')], forward_rate=nan, reverse_rate=nan),
 Reaction(input=[Species: galK, Species: micF], output=[Species: ('galK', 'micF')], forward_rate=nan, reverse_rate=nan),
 Reaction(input=[Species: galK, Species: ryhB], output=[Species: ('galK', 'ryhB')], forward_rate=nan, reverse_rate=nan),
 Reaction(input=[Species: micA, Species: micA], output=[Species: ('micA', 'micA')], forward_rate=nan, reverse_rate=nan),
 Reaction(input=[Species: micA, Species: micC], output=[Species: ('micA', 'micC')], forward_rate=nan, reverse_rate=nan),
 Reaction(input=[Species: micA, Species: micF], output=[Species: ('micA', 'micF')], forward_rate=nan, reverse_rate=nan),
 Reaction(input=[Species: micA, 

In [13]:
modeller = CircuitModeller(result_writer=data_writer, config=config_file)
circuits = modeller.batch_circuits(
    circuits=[circuit],
    write_to_subsystem=True, batch_size=config_file['simulation'].get('batch_size', 100),
    methods={
        "compute_interactions": {},
        "init_circuits": {'batch': True},
        "simulate_signal_batch": {'ref_circuit': None,
                                  'batch': True},
        "write_results": {'no_visualisations': config_file['experiment'].get('no_visualisations', True),
                          'no_numerical': config_file['experiment'].get('no_numerical', False)}
    })



Steady states:  100  iterations.  571  left to steady out.  0:01:31.025998
Steady states:  200  iterations.  40  left to steady out.  0:03:03.561122
Steady states:  300  iterations.  12  left to steady out.  0:04:34.841132
Steady states:  400  iterations.  4  left to steady out.  0:06:08.157712
Steady states:  500  iterations.  2  left to steady out.  0:07:47.641448
Steady states:  600  iterations.  2  left to steady out.  0:09:24.123607
Steady states:  700  iterations.  1  left to steady out.  0:11:04.732546
Steady states:  800  iterations.  1  left to steady out.  0:12:38.582809
Done:  0:14:13.386875




Done:  0:01:35.793048


circuit_manager.py:batch_circuits():645: Single batch: 1:19:01.765303 
Projected time: 4741.765303s 
