In [1]:
%load_ext autoreload
%autoreload 2

# Analyse result of simulating mutational robustness of sRNA-mRNA 

## Imports

In [2]:
import os

import pandas as pd

from synbio_morpher.srv.io.manage.script_manager import script_preamble
from synbio_morpher.srv.sequence_exploration.sequence_analysis import b_tabulate_mutation_info
from synbio_morpher.utils.common.setup import prepare_config, expand_config
from synbio_morpher.utils.data.data_format_tools.common import load_multiple_as_list


# Combine data from multi-thread

## Simulation settings

In [3]:
config = {
    "experiment": {
        "purpose": "tests",
    },
    "system_type": "RNA",
    "include_prod_deg": False,
    "interaction_simulator": {
        "name": "IntaRNA",
        "postprocess": True,
        "multithread": 0,
        "simulator_kwargs": {
            "outcsvcols": "id1,id2,E,E_norm,subseqDB,Pu1,Pu2"
        }
    },
    "mutations_args": {
        "algorithm": "all",
        "mutation_counts": 0,
        "mutation_nums_within_sequence": [1],
        "mutation_nums_per_position": 1,
        "concurrent_species_to_mutate": "single_species_at_a_time",
        "seed": 0
    },
    "simulation": {
        "dt0": 0.1,
        "t0": 0,
        "t1": 100,
        "tmax": 2000,
        "solver": "diffrax",
        "use_batch_mutations": True,
        "interaction_factor": 1,
        "batch_size": 200,
        "max_circuits": 2000,
        "device": "cpu",
        "threshold_steady_states": 0.05,
        "use_rate_scaling": True
    },
    "molecular_params": {
        "avg_mRNA_per_cell": 100,
        "cell_doubling_time": 1200,
        "creation_rate": 2.35,
        "starting_copynumbers": 200,
        "degradation_rate": 0.01175,
        "association_binding_rate": 1000000
    }
}

data_writer = None
config, data_writer = script_preamble(config, data_writer)
config_file = expand_config(config=config)
config_file = prepare_config(config_file)

## Load data

In [4]:
source_dirs = [
    "data/tests/2024_01_21_201544",
    "data/tests/2024_01_21_201545",
    "data/tests/2024_01_21_201546",
    "data/tests/2024_01_21_201547",
    "data/tests/2024_01_21_201548",
    "data/tests/2024_01_21_201549",
    "data/tests/2024_01_21_201550",
    "data/tests/2024_01_21_201551",
    "data/tests/2024_01_21_201552",
    "data/tests/2024_01_21_201553",
    "data/tests/2024_01_21_201554",
    "data/tests/2024_01_21_201555",
    "data/tests/2024_01_21_201557",
    "data/tests/2024_01_21_201558",
    "data/tests/2024_01_21_201559",
    "data/tests/2024_01_21_201601",
    "data/tests/2024_01_21_201602",
    "data/tests/2024_01_21_201604",
    "data/tests/2024_01_21_201605",
    "data/tests/2024_01_21_201606",
    "data/tests/2024_01_21_201607",
    "data/tests/2024_01_21_201608",
    "data/tests/2024_01_21_201609"
]

### Edit mutation csv's

In [10]:
# Correcting mistakenly overwritten mutation files

# for topdir in source_dirs:
#     for circuit_dir in sorted(os.listdir(topdir)):
#         if os.path.isdir(os.path.join(topdir, circuit_dir, 'mutations')):
#             fn_muts = os.path.join(topdir, circuit_dir, 'mutations_og.csv')
#             fn_muts_new = os.path.join(topdir, circuit_dir, 'mutations.csv')
#             if os.path.isfile(fn_muts):
#                 mutations = pd.read_csv(fn_muts, index_col='mutation_name')
#                 actual_mutations = sorted(os.listdir(os.path.join(topdir, circuit_dir, 'mutations')))
#                 if len(actual_mutations) > len(mutations):
#                     print(circuit_dir)

In [5]:
for topdir in source_dirs:
    for circuit_dir in sorted(os.listdir(topdir)):
        if os.path.isdir(os.path.join(topdir, circuit_dir, 'mutations')):
            fn_muts = os.path.join(topdir, circuit_dir, 'mutations_og.csv')
            fn_muts_new = os.path.join(topdir, circuit_dir, 'mutations.csv')
            if not os.path.isfile(fn_muts):
                mutations = pd.read_csv(fn_muts_new, index_col='mutation_name')
                mutations.to_csv(fn_muts)
            else:
                mutations = pd.read_csv(fn_muts, index_col='mutation_name')
            actual_mutations = sorted(os.listdir(os.path.join(topdir, circuit_dir, 'mutations')))
            if len(actual_mutations) < len(mutations):
                mutations.loc[actual_mutations].to_csv(fn_muts)

In [8]:
tables = load_multiple_as_list(inputs_list=source_dirs, load_func=b_tabulate_mutation_info, 
            data_writer=data_writer)
saves = pd.concat(tables, axis=0)

1


ValueError: array length 319 does not match index length 24

# Visualise