In [3]:
import reactionmodel.load
import glob
import os
import re
import numpy as np
import pandas as pd
from dataclasses import dataclass
from typing import NamedTuple

from simulators import SIMULATORS
import test

@dataclass
class SimulatorArguments():
    t_span: tuple
    t_eval: tuple

TEST_ARGUMENTS = SimulatorArguments((0.0, 50.0), np.linspace(0, 50, 51))

inital_cwd = os.getcwd()

In [17]:
def do_simulations(s, n=200):
    results = []
    simulator = s.simulator
    forward_time = SIMULATORS[simulator]
    rng = np.random.default_rng()
    initial_condition = s.model.make_initial_condition(s.initial_condition)
    simulation_options = s.simulation_options.copy()

    if simulator == 'hybrid':
        import hybrid
        partition_path = simulation_options.pop('partition')
        partition_scheme = hybrid.load_partition_scheme(partition_path)
        simulation_options['partition_function'] = partition_scheme.partition_function
    k = s.model.get_k(parameters=s.parameters, jit=True)
    for i in range(n):
        print(i)
        result = forward_time(initial_condition, TEST_ARGUMENTS.t_span, k, s.model.stoichiometry(), s.model.rate_involvement(), rng, discontinuities=TEST_ARGUMENTS.t_eval, **simulation_options)
        results.append(result)
    return results

def align_results(results, time, target_indices, species_names):
    all_aligned = []

    for r in results:
        aligned = []
        t_history = r.t_history
        for t in time:
            idx = np.argmin(np.abs(t-t_history))
            aligned.append((r.t_history[idx], *[r.y_history[target_index,idx] for target_index in target_indices]))
        all_aligned.append(pd.DataFrame.from_records(aligned, columns=['time', *species_names]))

    indexed_results = []
    for r in all_aligned:
        r['time'] = np.round(r['time'], 5)
        r = r.set_index('time')
        indexed_results.append(r)

    return indexed_results

def z_score_for_mean(aligned_results, check_data, n):
    target_species = set([c.split('-')[0] for c in check_data.columns if len(c.split('-')) > 1])

    df = pd.concat(aligned_results, axis=1)
    results_to_check = pd.concat([df.groupby(by=df.columns, axis=1).mean(), df.groupby(by=df.columns, axis=1).std()], axis=1)
    results_to_check.columns = [c + '-mean' if i < len(target_species) else c + '-sd' for i,c in enumerate(results_to_check.columns)]

    # https://github.com/sbmlteam/sbml-test-suite/blob/release/cases/stochastic/DSMTS-userguide-31v2.pdf
    z_ts = {}
    for species in target_species:
        z_t = (results_to_check[f'{species}-mean'] - check_data[f'{species}-mean'])/(check_data[f'{species}-sd']) * np.sqrt(n)
        z_ts[species] = z_t
    
    z_ts = pd.DataFrame(z_ts)

    return results_to_check, z_ts

class TestResult(NamedTuple):
    results_df: pd.DataFrame
    check_df: pd.DataFrame
    z_scores_for_mean_by_species: pd.DataFrame

def single_test(specification, check_path, n=200, save=False):
    check_data = pd.read_csv(check_path)
    results = do_simulations(specification, n=n)
    desired_species = set([c.split('-')[0] for c in check_data.columns if len(c.split('-')) > 1])
    all_species = [s.name for s in specification.model.species]
    targets = [all_species.index(s) for s in desired_species]
    import pdb; pdb.set_trace()

    aligned = align_results(results, check_data['time'], targets, desired_species)

    results_table, z_ts = z_score_for_mean(aligned, check_data, n)

    if save:
        results_table.to_csv(os.path.join(os.path.dirname(check_path), f'n={n}_simulation_results.csv'))
        z_ts.to_csv(os.path.join(os.path.dirname(check_path), f'n={n}_simulation_zscores.csv'))

    return TestResult(results_table, check_data, z_ts)

def run_tests_with_checks(root, specifications, **kwargs):
    tests_to_do = []
    for check in glob.glob(os.path.join(root, 'checks', '*/')):
        check_dir_root = check.split('/')[-2]
        tests_to_do.append((specifications[check_dir_root], check))
    print(f"Performing tests with benchmark data for {len(tests_to_do)}/{len(specifications)} combinations.")

    test_results = {}
    for specification, check_dir in tests_to_do:
        # each check directory contains 1 CSV file with a name like check{SBML_TEST_NUMBER}.csv
        assert(len(glob.glob(os.path.join(check_dir, 'check*.csv')))) == 1, f"Check directory {check_dir} had more than 1 check csv. I don't know what to do"
        check_file = glob.glob(os.path.join(check_dir, 'check*.csv'))[0]
        test_results[check_dir] = single_test(specification, check_file, **kwargs)
    return test_results

def get_files(root, individual, collection, pattern):
    if os.path.isfile(os.path.join(root, individual)):
        return [os.path.join(root, individual)]
    return glob.glob(os.path.join(root, collection, pattern))

def run_tests_from_dir(dir, **kwargs):
    model_paths  = get_files(dir, 'model.txt', 'models', 'model*.txt')
    params_paths = get_files(dir, 'parameters.txt', 'parameters', 'parameters*.txt')
    config_paths = get_files(dir, 'config.txt', 'configurations', 'config*.txt')
    ic_paths     = get_files(dir, 'ic.txt', 'initial_conditions', 'initial*.txt')
    specifications = {}
    for model_path in model_paths:
        for params_path in params_paths:
            for config_path in config_paths:
                for ic_path in ic_paths:
                    specification = reactionmodel.load.load_specification(model_path, params_path, config_path, ic_path)
                    # use the parameter and ic file names as a unique identifier for this combination
                    # later, we will look up all the combinations that we have test data for, and run simulations to check
                    model_match = re.search('[a-z]+([0-9]+)\.txt', model_path)
                    config_match = re.search('[a-z]+([0-9]+)\.txt', config_path)
                    param_match = re.search('[a-z]+([0-9]+)\.txt', params_path)
                    ic_match = re.search('[a-z]+([0-9]+)\.txt', ic_path)
                    matches = [('m', model_match), ('c', config_match), ('p', param_match), ('i', ic_match)]
                    identifier = ''
                    for id_str, match in matches:
                        if match:
                            identifier += id_str + str(match[1])
                    # if identifier == '': all of the configuration files lived in root directory, so the check should just live in the root of the check directory
                    specifications[identifier] = specification
    return run_tests_with_checks(dir, specifications, **kwargs)

# Run all tests and save outputs

In [6]:
os.chdir(inital_cwd)
test_dir = "./tests/sbml-tests/"

In [None]:
tests = glob.glob(os.path.join(test_dir, 'sbml-*'))

n = 1000

for t_dir in tests:
    print(t_dir)
    run_tests_from_dir(t_dir, save=True, n=n)

# Run and analyze one test

In [19]:
os.chdir(inital_cwd)

target_test = "sbml-003-dimerisation"
#target_test = "sbml-001-birth-death"
target_check = "p01i01"

n = 5

specification = reactionmodel.load.load_specification(*test.get_path_tuple(os.path.join(test_dir, target_test), target_check))
check_file = glob.glob(os.path.join(test_dir, target_test, 'checks', target_check, 'check*.csv'))[0]
test_result = single_test(specification, check_file, n=n, save=True)

Evaluating expression: k1 => 0.001
Evaluating expression: k2 => 0.01
0
1
2
3
4
> [0;32m/var/folders/sv/t8bxbx_j571g94rv2ngmvx3m0000gn/T/ipykernel_98213/2041504609.py[0m(70)[0;36msingle_test[0;34m()[0m
[0;32m     68 [0;31m    [0;32mimport[0m [0mpdb[0m[0;34m;[0m [0mpdb[0m[0;34m.[0m[0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     69 [0;31m[0;34m[0m[0m
[0m[0;32m---> 70 [0;31m    [0maligned[0m [0;34m=[0m [0malign_results[0m[0;34m([0m[0mresults[0m[0;34m,[0m [0mcheck_data[0m[0;34m[[0m[0;34m'time'[0m[0;34m][0m[0;34m,[0m [0mtargets[0m[0;34m,[0m [0mdesired_species[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m     71 [0;31m[0;34m[0m[0m
[0m[0;32m     72 [0;31m    [0mresults_table[0m[0;34m,[0m [0mz_ts[0m [0;34m=[0m [0mz_score_for_mean[0m[0;34m([0m[0maligned[0m[0;34m,[0m [0mcheck_data[0m[0;34m,[0m [0mn[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0m
{'P2', 'P'}


  results_to_check = pd.concat([df.groupby(by=df.columns, axis=1).mean(), df.groupby(by=df.columns, axis=1).std()], axis=1)
  results_to_check = pd.concat([df.groupby(by=df.columns, axis=1).mean(), df.groupby(by=df.columns, axis=1).std()], axis=1)


## Analyze results

In [20]:
test_result.results_df

Unnamed: 0_level_0,P-mean,P2-mean,P-sd,P2-sd
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0.0,100.0,0.0,0.0,0.0
1.0,90.0,5.0,1.414214,0.707107
2.0,85.2,7.4,3.63318,1.81659
3.0,76.0,12.0,3.741657,1.870829
4.0,69.6,15.2,2.19089,1.095445
5.0,66.4,16.8,4.335897,2.167948
6.0,62.4,18.8,5.176872,2.588436
7.0,60.4,19.8,5.549775,2.774887
8.0,56.4,21.8,4.335897,2.167948
9.0,52.8,23.6,2.683282,1.341641


In [21]:
test_result.check_df

Unnamed: 0,time,P-mean,P2-mean,P-sd,P2-sd
0,0,100.0,0.0,0.0,0.0
1,1,91.031766,4.484117,3.862504,1.931252
2,2,83.61694,8.19153,4.815194,2.407597
3,3,77.396987,11.301506,5.26609,2.633045
4,4,72.115319,13.94234,5.489485,2.744742
5,5,67.583559,16.208221,5.592782,2.796391
6,6,63.660332,18.169834,5.628348,2.814174
7,7,60.237496,19.881252,5.624594,2.812297
8,8,57.230932,21.384534,5.597947,2.798974
9,9,54.574231,22.712885,5.558298,2.779149


In [22]:
for species, series in test_result.z_scores_for_mean_by_species.items():
    print(species)
    print(series)

P2
time
0.0          NaN
1.0     0.597307
2.0    -0.735138
3.0     0.593184
4.0     1.024582
5.0     0.473202
6.0     0.500713
7.0    -0.064604
8.0     0.331911
9.0     0.713761
10.0    0.736033
11.0    1.026731
12.0    0.917412
13.0    1.218512
14.0    1.257666
15.0    1.358455
16.0    1.172700
17.0    1.032136
18.0    1.106213
19.0    0.870364
20.0    0.314645
21.0    0.314684
22.0   -0.189869
23.0   -0.494517
24.0   -0.419015
25.0   -0.681242
26.0   -0.744536
27.0   -0.789824
28.0   -1.364444
29.0   -2.291734
30.0   -2.294506
31.0   -2.467178
32.0   -1.525565
33.0   -1.857672
34.0   -1.811629
35.0   -1.571040
36.0   -1.505790
37.0   -1.802524
38.0   -1.906883
39.0   -1.633141
40.0   -1.352291
41.0   -0.507098
42.0   -0.027133
43.0    0.272466
44.0    0.390911
45.0    0.327521
46.0    0.641164
47.0    0.772357
48.0    0.533949
49.0    0.858801
50.0    1.000208
Name: P2, dtype: float64
P
time
0.0          NaN
1.0    -0.597307
2.0     0.735138
3.0    -0.593184
4.0    -1.024581
5.0    -