In [1]:
import os
import numpy as np
import pandas as pd
import jinja2
import bibtexparser
from pathlib import Path
import datetime
from dataclasses import dataclass

In [41]:
# ----------------------------
# Core Classes (same logic)
# ----------------------------

class Experiment:
    def __init__(self, data_source,  # str path or pd.DataFrame
                 stresses: dict[str, tuple[float, str]],
                 bibtex: str = ""):

        if isinstance(data_source, str):
            self.name = os.path.splitext(os.path.basename(data_source))[0]
            self.experiment_data = pd.read_csv(data_source)
        elif isinstance(data_source, pd.DataFrame):
            self.name = "worksheet_experiment"
            self.experiment_data = data_source.copy()
        else:
            raise ValueError("data_source must be a file path or a pandas DataFrame")

        self.stresses = stresses
        self.bibtex = self.parse_bibtex(bibtex)
        self.non_species_cols = {"TIME"}
        self.process_data()

    def parse_bibtex(self, bibtex_str):
        parser = bibtexparser.loads(bibtex_str)
        if not parser.entries:
            raise ValueError("BibTeX parse produced no entries.")
        entry = parser.entries[0]

        return {
            "author": entry.get("author", ""),
            "title": entry.get("title", ""),
            "journal": entry.get("journal", ""),
            "volume": entry.get("volume", ""),
            "number": entry.get("number", ""),
            "year": entry.get("year", ""),
            "doi": entry.get("doi", entry.get("url", "")),
        }

    def process_data(self) -> None:
        self.experiment_data.columns = [col.upper() for col in self.experiment_data.columns]
        # keep your behavior:
        self.experiment_data.rename(columns={'TIME': 'time'}, inplace=True)
        self.experiment_data = self.experiment_data.dropna()
        self.species = [
            v for v in self.experiment_data.columns
            if v.upper() not in self.non_species_cols and "STD" not in v.upper()
        ]

    def quantitated_exp_data(self, ics: dict[str, float]) -> None:
        quant_Data = self.experiment_data.copy()
        species_and_std = [col for col in quant_Data.columns if col.upper() not in self.non_species_cols]
        for s in species_and_std:
            if 'STD' in s.upper():
                quant_Data[s] *= ics[s[0:-3]]
            else:
                quant_Data[s] *= ics[s]
        self.quant_data = quant_Data


class TheoreticalRanges:
    def __init__(self, min_max_path: str, scaling_factor: float, first_species_col: int):
        self.name = os.path.splitext(os.path.basename(min_max_path))[0]
        if min_max_path.endswith('.csv'):
            self.df_ranges = pd.read_csv(min_max_path)
        elif min_max_path.endswith('.xlsx'):
            self.df_ranges = pd.read_excel(min_max_path, sheet_name="icranges")
        else:
            raise ValueError("Range file must be .csv or .xlsx")

        self.scaling_factor = float(scaling_factor)
        self.df_scaled_ranges = self.df_ranges.select_dtypes(include='number') * self.scaling_factor
        self.bounds = self.get_bounds()

    def get_bounds(self) -> dict[str, tuple[float, float]]:
        bounds = {}
        for idx, row in self.df_ranges.iterrows():
            lb = row['minconc'] * self.scaling_factor
            ub = row['maxconc'] * self.scaling_factor
            if pd.isna(lb) or lb < 0:
                lb = 0.0
            if pd.isna(ub) or ub < 0:
                ub = 0.0
            bounds[row['species']] = [float(lb), float(ub)]
        return bounds

    def check_compatibility(self, experiment: Experiment) -> None:
        for s in experiment.species:
            if s not in self.bounds:
                self.bounds[s] = [0.0, 0.0]


class Simulation:
    def __init__(self, species_range: TheoreticalRanges, experiment: Experiment):
        self.species_range = species_range
        self.experiment = experiment
        self.species_range.check_compatibility(experiment=self.experiment)

    def create_xml_files(self, output_xmls_path: str, num_of_xmls: int, xml_template_path: str) -> None:
        os.makedirs(output_xmls_path, exist_ok=True)

        env = jinja2.Environment(loader=jinja2.FileSystemLoader(os.path.dirname(xml_template_path)))
        self.template = env.get_template(os.path.basename(xml_template_path))

        for i in range(1, num_of_xmls + 1):
            self.random_ics = self.get_random_ics()
            self.experiment.quantitated_exp_data(ics=self.random_ics)
            self.make_xml_output(i, output_xmls_path)

    def get_random_ics(self) -> dict[str, float]:
        random_ics = {s: np.random.uniform(*self.species_range.bounds[s]) for s in self.species_range.bounds}

        # keep your override behavior:
        for s in self.experiment.stresses:
            if self.experiment.stresses[s][1] == "molecular_species":
                random_ics[s] = self.experiment.stresses[s][0]

        random_ics["REF"] = 1.0
        return random_ics

    def make_xml_output(self, file_index: int, output_xmls_path: str) -> None:
        dataPoints = [self.compileDataRow(row.values) for _, row in self.experiment.quant_data.iterrows()]
        output = self.template.render(
            ics=self.random_ics,
            variables=self.experiment.species,
            dataPoints=dataPoints,
            bib=self.experiment.bibtex
        )

        author = (self.experiment.bibtex.get('author') or "Unknown").split()
        author_tag = (author[0][:-1] if author else "Unknown")
        year = self.experiment.bibtex.get("year") or "????"

        filename = f"{author_tag+'_'+year}_{self.experiment.name}_{file_index:04d}.xml"
        with open(os.path.join(output_xmls_path, filename), 'w', encoding="utf-8") as f:
            f.write(output)

    def compileDataRow(self, dataPoints):
        meas = "".join(f"<{v}>{{:.4e}}</{v}>" for v in self.experiment.experiment_data.columns)
        return f"<dataPoint>{meas.format(*dataPoints)}</dataPoint>"


In [42]:
@dataclass
class NotebookInputs:
    experiment_xlsx_path: str                 # Excel with many sheets + last sheet = BibTeX
    theoretical_range_path: str               # .csv or .xlsx
    scaling_factor: float                     # e.g. 1e-12
    first_species_col_index: int              # e.g. 1
    xml_template_path: str                    # template .xml
    output_xml_dir: str                       # folder for produced xmls
    output_opp_dir: str                       # folder for produced .opp
    num_xmls_per_sheet: int                   # # of XMLs per worksheet
    stress_info: str = ""                     # e.g. "RAP molecular_species 100e-12" or "starvation"
    
    # .opp defaults (same as your UI method)
    mech_file: str = "7_Krisztian/mech/BCRN6.inp"
    yaml_file: str = "7_Krisztian/mech/BCRN6.yaml"
    time_limit: int = 50
    thread_limit: int = 32
    settings_tag: str = "systems_biology"
    solver: str = "cantera"
    extension: str = ".xml"


def parse_stress_info(stress_info: str) -> dict[str, tuple[float, str]]:
    """
    Your UI placeholder suggests: "(molecular_species/starvation RAP 100e-12)"
    But your old parsing was inconsistent and had a bug (it swapped things + mixed types).
    
    Supported patterns:
      1) ""  -> {}
      2) "starvation" -> {"starvation": ("", "")}
      3) "RAP molecular_species 100e-12" -> {"RAP": (1e-10, "molecular_species")}
         (i.e. <species> <type> <value>)
      4) "molecular_species RAP 100e-12" -> same, order swapped
    """
    s = (stress_info or "").strip()
    if not s:
        return {}

    parts = s.split()
    if len(parts) == 1:
        # non-molecular stress flag
        if parts[0] != "molecular_species":
            return {parts[0]: ("", "")}
        return {}

    if len(parts) != 3:
        raise ValueError(f"stress_info must have 0, 1, or 3 tokens, got: {parts}")

    a, b, c = parts[0], parts[1], parts[2]
    val = float(c)

    # allow either ordering:
    if a == "molecular_species":
        species = b
        typ = "molecular_species"
    elif b == "molecular_species":
        species = a
        typ = "molecular_species"
    else:
        # if you ever want other types, handle here
        raise ValueError("For 3 tokens, one must be 'molecular_species'.")

    return {species.upper(): (val, typ)}


def read_bibtex_from_last_sheet(experiment_xlsx_path: str) -> str:
    all_sheets = pd.read_excel(experiment_xlsx_path, sheet_name=None)
    last_sheet_name = list(all_sheets.keys())[-1]
    bibtex_df = all_sheets[last_sheet_name]

    bibtex_lines = bibtex_df.iloc[:, 0].dropna().astype(str).tolist()
    bibtex_str = "\n".join([ln for ln in bibtex_lines if ln.strip()])

    if not bibtex_str.strip():
        raise ValueError("No valid BibTeX found in the last worksheet (first column).")

    return bibtex_str


def generate_opp_content(
    xml_folder: str,
    worksheet_name: str,
    mech_file: str,
    yaml_file: str,
    time_limit: int,
    thread_limit: int,
    settings_tag: str,
    solver: str,
    extension: str = ".xml"
) -> str:
    folder = Path(xml_folder)
    xml_files = sorted(f for f in folder.glob(f"*{worksheet_name}*{extension}"))

    mechmod = f"""MECHMOD
    USE_NAME         BCRN6
    MECH_FILE        {mech_file}
    COMPILE_cantera  {yaml_file}
    END
    """

    mechtest = f"""MECHTEST
        MECHANISM  BCRN6
        TIME_LIMIT {time_limit}
        THREAD_LIMIT {thread_limit}
        SETTINGS_TAG {settings_tag}
        FALLBACK_TO_DEFAULT_SETTINGS

        SOLVER {solver}
        SAVE_STATES      CSV
"""

    for xml in xml_files:
        mechtest += f"      NAME {xml.as_posix()}\n"

    mechtest += "END\n"
    return mechmod + "\n" + mechtest


def run_simulation_from_notebook(cfg: NotebookInputs, verbose: bool = True):
    # Basic sanity checks (helps debugging)
    for p in [cfg.experiment_xlsx_path, cfg.theoretical_range_path, cfg.xml_template_path]:
        if not os.path.exists(p):
            raise FileNotFoundError(p)
    os.makedirs(cfg.output_xml_dir, exist_ok=True)
    os.makedirs(cfg.output_opp_dir, exist_ok=True)

    stresses = parse_stress_info(cfg.stress_info)
    bibtex_str = read_bibtex_from_last_sheet(cfg.experiment_xlsx_path)

    if verbose:
        print("=== BibTeX read from last sheet ===")
        print(bibtex_str)
        print("=== Parsed stresses ===")
        print(stresses)

    all_sheets = pd.read_excel(cfg.experiment_xlsx_path, sheet_name=None)
    sheet_names = list(all_sheets.keys())
    data_sheet_names = sheet_names[:-1]  # everything except last (BibTeX)

    date = datetime.datetime.now()

    produced = {}  # dict[experiment_name -> {"exp": Experiment, "rng": TheoreticalRanges, "sim": Simulation, ...}]

    for sheet_name in data_sheet_names:
        df = all_sheets[sheet_name]

        exp = Experiment(df, stresses, bibtex_str)
        exp.name = sheet_name

        rng = TheoreticalRanges(cfg.theoretical_range_path, cfg.scaling_factor, cfg.first_species_col_index)
        sim = Simulation(rng, exp)

        sim.create_xml_files(cfg.output_xml_dir, cfg.num_xmls_per_sheet, cfg.xml_template_path)

        opp_content = generate_opp_content(
            xml_folder=cfg.output_xml_dir,
            worksheet_name=sheet_name,
            mech_file=cfg.mech_file,
            yaml_file=cfg.yaml_file,
            time_limit=cfg.time_limit,
            thread_limit=cfg.thread_limit,
            settings_tag=cfg.settings_tag,
            solver=cfg.solver,
            extension=cfg.extension
        )

        author = (exp.bibtex.get('author') or "Unknown").split()
        author_tag = (author[0][:-1] if author else "Unknown")
        opp_filename = f"{date.year}{date.month:02d}{date.day:02d}_BCRN_{author_tag}_{sheet_name}.opp"
        opp_path = os.path.join(cfg.output_opp_dir, opp_filename)

        with open(opp_path, "w", encoding="utf-8") as f:
            f.write(opp_content)

        # --- store everything under the experiment name (= sheet_name) ---
        produced[sheet_name] = {
            "exp": exp,
            "rng": rng,
            "sim": sim,
            "opp_path": opp_path,
            "opp_filename": opp_filename,
            # optional extras that are often handy for debugging:
            # "stresses": stresses,
            # "bibtex_str": bibtex_str,
        }

        if verbose:
            print(f"[OK] Sheet '{sheet_name}': wrote XMLs to '{cfg.output_xml_dir}', OPP: {opp_path}")


    return produced


In [43]:
# ----------------------------
# EDIT THESE like your UI
# ----------------------------
cfg = NotebookInputs(
    experiment_xlsx_path="../../../../BCRN/0_evaluate/input_files/Mattiolo_2015.xlsx",
    theoretical_range_path="../../../../BCRN/0_evaluate/input_files/reactions_ics_finalised_tester.xlsx",        # or .xlsx
    scaling_factor=1e-12,
    first_species_col_index=1,
    xml_template_path="../../../../BCRN/0_evaluate/input_files/xml_template.xml",
    output_xml_dir="../../../../BCRN/xml",
    output_opp_dir="../../../../BCRN/1_mechtest",
    num_xmls_per_sheet=1,
    stress_info="starvation",         # or "starvation" or ""
)

produced = run_simulation_from_notebook(cfg, verbose=True)
produced

=== BibTeX read from last sheet ===
@article{mattiolo2015autophagy,
title={Autophagy exacerbates caspase-dependent apoptotic cell death after short times of starvation},
author={Mattiolo, Paolo and Yuste, Victor J and Boix, Jacint and Ribas, Judit},
journal={Biochemical Pharmacology},
volume={98},
number={4},
pages={573--586},
year={2015},
publisher={Elsevier}
}
=== Parsed stresses ===
{'starvation': ('', '')}
[OK] Sheet 'Casp': wrote XMLs to '../../../../BCRN/xml', OPP: ../../../../BCRN/1_mechtest\20260116_BCRN_Mattiolo_Casp.opp
[OK] Sheet 'cyt': wrote XMLs to '../../../../BCRN/xml', OPP: ../../../../BCRN/1_mechtest\20260116_BCRN_Mattiolo_cyt.opp


{'Casp': {'exp': <__main__.Experiment at 0x1f92f7d8830>,
  'rng': <__main__.TheoreticalRanges at 0x1f92f7da660>,
  'sim': <__main__.Simulation at 0x1f92f7db770>,
  'opp_path': '../../../../BCRN/1_mechtest\\20260116_BCRN_Mattiolo_Casp.opp',
  'opp_filename': '20260116_BCRN_Mattiolo_Casp.opp'},
 'cyt': {'exp': <__main__.Experiment at 0x1f9302ca5d0>,
  'rng': <__main__.TheoreticalRanges at 0x1f92fb75090>,
  'sim': <__main__.Simulation at 0x1f92ef81590>,
  'opp_path': '../../../../BCRN/1_mechtest\\20260116_BCRN_Mattiolo_cyt.opp',
  'opp_filename': '20260116_BCRN_Mattiolo_cyt.opp'}}

In [46]:
produced['Casp']['exp'].quant_data

Unnamed: 0,time,CASP,CASPSTD
0,0,0.0,0.0
1,3,0.0,0.0
2,6,0.0,0.0
3,9,0.0,0.0
4,12,0.0,0.0
5,16,0.0,0.0
6,24,0.0,0.0


In [None]:
curr = os.getcwd()
print(f"Current working directory: {curr}")


Current working directory: c:\Users\local_user\Desktop\Study\VII_semester\TDK\BCRN\0_evaluate\codes\others
