# Find circuits based on their interaction strengths

In [10]:

import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


if __package__ is None:

    module_path = os.path.abspath(os.path.join('..'))
    sys.path.append(module_path)

    __package__ = os.path.basename(module_path)
    

from src.srv.sequence_exploration.sequence_analysis import b_tabulate_mutation_info, pull_circuits_from_stats
from src.utils.common.setup_new import construct_circuit_from_cfg
from src.utils.circuit.agnostic_circuits.circuit_manager_new import CircuitModeller
from src.utils.misc.type_handling import flatten_listlike
from src.utils.modelling.physical import equilibrium_constant_reparameterisation, F
from src.utils.results.analytics.naming import get_true_names_analytics, get_true_interaction_cols
from tests_local.shared import five_circuits, CONFIG

from copy import deepcopy


config = deepcopy(CONFIG)

In [None]:
circuits, config, data_writer = five_circuits(
    config, data_writer=None)



In [17]:
stats_pathnames = [
    '../data/ensemble_generate_circuits/2023_02_23_174630/gather_interaction_stats/circuit_stats.csv',
    '../data/ensemble_generate_circuits/2023_02_24_170946/gather_interaction_stats/circuit_stats.csv',
    '../data/gather_interaction_stats/2023_03_29_185202/circuit_stats.csv'
]

# df = pd.concat([pd.read_csv(s) for s in stats_pathnames])


Unnamed: 0,name,interacting,self_interacting,num_interacting,num_self_interacting,binding_sites_0-0,binding_sites_0-1,binding_sites_0-2,binding_sites_1-0,binding_sites_1-1,...,eqconstants_1-0,eqconstants_1-1,eqconstants_1-2,eqconstants_2-0,eqconstants_2-1,eqconstants_2-2,path_binding_sites,path_binding_rates_dissociation,path_energies,path_eqconstants
0,toy_mRNA_circuit_0,[],[],0,0,,,,,,...,0.000009,0.000009,0.000009,0.000009,0.000009,0.000009,data/ensemble_mutation_effect_analysis/2023_02...,data/ensemble_mutation_effect_analysis/2023_02...,data/ensemble_mutation_effect_analysis/2023_02...,data/ensemble_mutation_effect_analysis/2023_02...
1,toy_mRNA_circuit_10000,[],[],0,0,,,,,,...,0.000009,0.000009,0.000009,0.000009,0.000009,0.000009,data/ensemble_mutation_effect_analysis/2023_02...,data/ensemble_mutation_effect_analysis/2023_02...,data/ensemble_mutation_effect_analysis/2023_02...,data/ensemble_mutation_effect_analysis/2023_02...
2,toy_mRNA_circuit_10001,[[1 2]],[[0 0]],1,1,"(8,20):(9,19):(10,17):(11,16):(12,15):(13,14):...",,,,,...,0.000009,0.000009,9.454608,0.000009,9.454608,0.000009,data/ensemble_mutation_effect_analysis/2023_02...,data/ensemble_mutation_effect_analysis/2023_02...,data/ensemble_mutation_effect_analysis/2023_02...,data/ensemble_mutation_effect_analysis/2023_02...
3,toy_mRNA_circuit_10002,[[1 2]],[],1,0,,,,,,...,0.000009,0.000009,149.760940,0.000009,149.760940,0.000009,data/ensemble_mutation_effect_analysis/2023_02...,data/ensemble_mutation_effect_analysis/2023_02...,data/ensemble_mutation_effect_analysis/2023_02...,data/ensemble_mutation_effect_analysis/2023_02...
4,toy_mRNA_circuit_10003,[],[],0,0,,,,,,...,0.000009,0.000009,0.000009,0.000009,0.000009,0.000009,data/ensemble_mutation_effect_analysis/2023_02...,data/ensemble_mutation_effect_analysis/2023_02...,data/ensemble_mutation_effect_analysis/2023_02...,data/ensemble_mutation_effect_analysis/2023_02...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37204,toy_circuit_combo0_5128,[],[],0,0,,,,,,...,0.006670,0.006670,0.006670,0.006670,0.006670,0.006670,./data/generate_seqs_flexible/2023_03_20_00561...,./data/generate_seqs_flexible/2023_03_20_00561...,./data/generate_seqs_flexible/2023_03_20_00561...,./data/generate_seqs_flexible/2023_03_20_00561...
37205,toy_circuit_combo0_5128,[],[],0,0,,,"(13,17):(14,16):(15,15):(16,14):(17,13):(18,12...",,,...,0.006670,0.006670,0.006670,0.630376,0.006670,0.006670,./data/generate_seqs_flexible/2023_03_20_00561...,./data/generate_seqs_flexible/2023_03_20_00561...,./data/generate_seqs_flexible/2023_03_20_00561...,./data/generate_seqs_flexible/2023_03_20_00561...
37206,toy_circuit_combo0_5128,[],[],0,0,,,,,,...,0.006670,0.006670,0.006670,0.006670,0.006670,0.006670,./data/generate_seqs_flexible/2023_03_20_00561...,./data/generate_seqs_flexible/2023_03_20_00561...,./data/generate_seqs_flexible/2023_03_20_00561...,./data/generate_seqs_flexible/2023_03_20_00561...
37207,toy_circuit_combo0_5128,[],[],0,0,,,,,,...,0.006670,0.006670,0.006670,0.006670,0.006670,0.006670,./data/generate_seqs_flexible/2023_03_20_00561...,./data/generate_seqs_flexible/2023_03_20_00561...,./data/generate_seqs_flexible/2023_03_20_00561...,./data/generate_seqs_flexible/2023_03_20_00561...


In [20]:

# stats_pathname = '../data/ensemble_generate_circuits/2023_02_23_174630/gather_interaction_stats/circuit_stats.csv'
df = pd.concat([pull_circuits_from_stats(filters={
    "min_num_interacting": 3,
    "max_self_interacting": None,
    "max_total": 30
}, stats_pathname=s) for s in stats_pathnames])

circuit_specs = df
# strong_circuits = [construct_circuit_from_cfg(
#     c, config_file=config) for c in circuit_specs]


### Statistics of randomly generated circuits

Below are some stats on a batch of 20000 circuits that were generated randomly, albeit from a distribution of RNA nucleotides where each had a probability weighting corresponding to its abundance in the E. coli genome. Species are considered interacting if the equilibrium constant is greater than 1. Something that becomes obvious when grouping the binding energies by the number of interacting and self-interacting RNA species is that
- only 25/20000 = 0.125% circuits have 3 interactions (between different species)
- self-interacting species do not reach as negative binding energies
- the stronger the binding energy, the more nucleotides are bound together
- the most negative binding energy (ca. -9kcal/mol) corresponds to an equilibrium constant (ca. 0.01) from the fluorescence parameterisation that is still well below $K = 1$, meaning that if the criteria for species interacting were based on this new equilibrium constant, none of the circuits would be considered interacting. Perhaps 

The following plots may be a bit misleading, as they appear to maintain the true spread of each of the features, but this is not the case - each stacked bar plot is adding up the blocks together, so a bar going from 0-100 with 5 blocks would mean each block has a value of around 20, while a bar that spans across 0 has blocks with negative values added together. The purpose is to show the rough length of each block, as well as highlighting differences between the spans of blocks at different numbers of interaction. For example, self-interactions tend to have a larger variance. The new equilibrium constant also has a smaller set of means compared to the original Gibbs equation K, for which a $\Delta G$ of 30kcal was given if the RNA simulator did not predict any binding.

In [None]:
stats = pd.read_csv(stats_pathname)


def undo_bplist(bp):
    return np.array([[int(i) for i in p.replace('(', '').replace(')', '').replace('nan', '0').split(',')] for p in str(bp).split(':')])

for c in stats.columns:
    if 'binding_sites_' in c and not (('counts' in c) or ('bindratio' in c)):
        stats[c + '_counts'] = stats[c].apply(str).apply(
            lambda x: len(x.split('nan')[0].split(':')) - (x == 'nan'))
        stats[c + '_bindratio'] = stats[c].apply(str).apply(
            lambda x: (undo_bplist(x).T[0, -1] - undo_bplist(x).T[0, 0]) / (len(undo_bplist(x)) - 1))

aggd = {c: 'mean'
        for c in stats.columns if ('binding_sites_' in c) and (('_counts' in c) or ('_bindratio' in c))}
aggd.update({c: ['mean',
                 lambda x: equilibrium_constant_reparameterisation(np.mean(x), initial)]
             for c in stats.columns if 'energies_' in c})

stats.groupby(['num_interacting', 'num_self_interacting'], as_index=False).agg(aggd)


aggd = {c: 'mean'
        for c in stats.columns if ('binding_sites_' in c) and ('_counts' in c)}

stats.groupby('num_interacting', as_index=False).agg(aggd).plot(
    x='num_interacting',
    kind='barh',
    stacked=True,
    title='Binding sites spread',
    mark_right=True)
plt.xlabel('Number of binding sites')
plt.show()

aggd = {c: 'mean'
        for c in stats.columns if 'energies_' in c}
stats.groupby('num_interacting', as_index=False).agg(aggd).plot(
    x='num_interacting',
    kind='barh',
    stacked=True,
    title='Stacked spread of energies',
    mark_right=True)
plt.xlabel('kcal/mol')
plt.show()
plt.close()

aggd = {c: 'mean'
        for c in stats.columns if 'eqconstants_' in c}
stats.groupby('num_interacting', as_index=False).agg(aggd).plot(
    x='num_interacting',
    kind='barh',
    stacked=True,
    title='Mean K',
    mark_right=True)
plt.xlabel('K')
plt.show()
plt.close()



E = np.arange(-80, 10, 0.5)
E_F_half = E[np.argmax(F(E) >= 0.5)]
initial = np.round(1/1 * (1/F(E_F_half) - 1), 2)

aggd = {c: lambda x: equilibrium_constant_reparameterisation(np.mean(x), initial)
        for c in stats.columns if 'energies_' in c}
stats.groupby('num_interacting', as_index=False).agg(aggd).rename(columns={'<lambda_0': 'mean_K-from-F'}).plot(
    x='num_interacting',
    kind='barh',
    stacked=True,
    title='Mean K from parameterised fluorescence',
    mark_right=True)
plt.xlabel('K')
plt.show()
plt.close()


In [None]:
config['signal']['function_kwargs']['target'] = 0.5
config['simulation']['t1'] = 7500
config['simulation']['dt'] = 0.01

circuits = CircuitModeller(result_writer=data_writer, config=config).batch_circuits(
    circuits=circuits,
    write_to_subsystem=True,
    batch_size=config['simulation'].get('batch_size', 100),
    methods={
        "compute_interactions": {},
        "init_circuits": {'batch': True},
        "simulate_signal_batch": {'ref_circuit': None,
                                  'batch': config['simulation']['use_batch_mutations']},
        "write_results": {'no_visualisations': config['experiment']['no_visualisations'],
                          'no_numerical': config['experiment']['no_numerical']}
    })

info = b_tabulate_mutation_info(data_writer.ensemble_write_dir,
                                data_writer=data_writer, experiment_config=config)
