In [18]:
import pandas as pd
import numpy as np
import datetime
import subprocess
from filegenerators import *
pd.options.display.float_format = '{:.2e}'.format
from pathlib import Path
import os
import bibtexparser
from numpy.typing import NDArray
import jinja2

In [44]:
class Theoretical_Ranges:
    def __init__(self, min_max_path: str, input_names: list[str], must_be_zero: list[str],
                 scaling_factor: float=1e-12, sheet_name:str = 'icranges'):
        self.name = os.path.splitext(os.path.basename(min_max_path))[0]
        self.df_ranges = pd.read_excel(min_max_path, sheet_name=sheet_name)
        self.df_ranges['value'] = self.df_ranges['value'].astype(float)
        self.df_ranges.species = [s.upper() for s in self.df_ranges.species]
        self.scaling_factor = scaling_factor
        self.df_scaled_ranges = self.df_ranges.select_dtypes(include='number') * self.scaling_factor
        self.must_be_zero = must_be_zero
        self.input_names = input_names
        self.get_input_data(input_names)
        self.bounds = self.get_bounds()
        self.get_sigmas()
        self.gen_lookuptable()

    def get_input_data(self, input_names: list[str]) -> None:
        self.inputs = {}
        for i in input_names:
            self.inputs[i] = 0.0
        self.inputs["REF"] = 1.0
        self.inputs["Insulin"] = 1e-10

        self.input_data = pd.DataFrame([
            {'species': species, 'minconc': value*1e+12, 'value': value*1e+12, 'maxconc': value*1e+12}
            for species, value in self.inputs.items()])

    def gen_lookuptable(self) -> None:
        self.lut = pd.concat([self.df_ranges, self.input_data], ignore_index=True)    # look-up-table
        self.lut['species'] = self.lut['species'].str.upper()
        print(f"LUT was created successfully. Its dimensions are: {self.lut.shape}")

    def get_bounds(self) -> dict[str, tuple[float, float]]:
        bounds = dict()
        for index, row in self.df_ranges.iterrows():
            if row.value < 0.1:
                lb = 1e-14
                ub = 1e-13
            else:
                lb = (row.value/2)*1e-12
                ub = (row.value*1.5)*1e-12
            bounds[row.species.upper()] = [lb, ub]
        print(bounds['TBID'])
        for _, row in self.input_data.iterrows():
            if row.species.upper() not in bounds.keys():
                bounds[row.species.upper()] = [row.minconc*1e-12, row.maxconc*1e-12]
        for m in self.must_be_zero:
            bounds[m.upper()] = [0, 0]
        return bounds

    def get_sigmas(self):
        self.sigmas = dict()
        for key, value in self.bounds.items():
            if key in self.must_be_zero: # this line might be unnecessary, as I think we need sigma only if the species is an output
                self.sigmas[key] = 5e-18
            if value[1] > value[0]:
                self.sigmas[key] = ((value[1]-value[0])/8)
            else:
                self.sigmas[key] = 5e-15


In [85]:
class Experiment:
    def __init__(self, data_source, # data_source: a pandas DataFrame
                 stresses: float, species_rng: Theoretical_Ranges,
                 sheet_name: str, bibtex: str = "", inputs: dict[str, float] = None):
        self.rng = species_rng
        if isinstance(data_source, pd.DataFrame): # Ha pandas DataFrame (azaz xlsx worksheet)
            self.name = sheet_name
            self.experiment_data = data_source.copy()
        else:
            raise ValueError("data_source must be a pandas DataFrame")

        self.stresses = stresses
        self.bibtex = self.parse_bibtex(bibtex)
        self.non_species_cols = {"TIME"}
        
        self.inputs = inputs if inputs is not None else species_rng.inputs
        self.process_data()

    def parse_bibtex(self, bibtex_str):
        parser = bibtexparser.loads(bibtex_str)
        if not parser.entries:
            raise ValueError("No valid BibTeX entry found.")
        entry = parser.entries[0]  # Assume only one entry is given

        return {
            "author": entry.get("author", ""),
            "title": entry.get("title", ""),
            "journal": entry.get("journal", ""),
            "volume": entry.get("volume", ""),
            "number": entry.get("number", ""),
            "year": entry.get("year", ""),
            "doi": entry.get("doi", entry.get("url", ""))  # fallback if no DOI
        }

    def process_data(self) -> None:
        self.experiment_data.columns = [col.upper() for col in self.experiment_data.columns]
        self.experiment_data.rename(columns={'TIME': 'time'}, inplace=True)
        self.experiment_data.time = self.experiment_data.time #* 60  # Converting hrs to mins - if I convert, some xmls fail for some reaseon
        self.experiment_data = self.experiment_data.dropna()
        self.species = [v for v in self.experiment_data.columns if v.upper() not in self.non_species_cols and "STD" not in v.upper()]

    def quantitated_exp_data(self, ics: dict[str, float]) -> None:
        quant_Data = self.experiment_data.copy()
        #species_and_std = [col for col in quant_Data.columns if col.upper() not in self.non_species_cols]
        #for s in species_and_std:
        #    if 'STD' in s.upper():
        #        quant_Data[s] *= ics[s[0:-3]]
        #    #elif f"{s}_STD" not in species_and_std:
        #    else:
        #        quant_Data[s] *= ics[s]

        for col in quant_Data.columns:
            if col.upper() not in self.non_species_cols:
                if 'STD' in col.upper():
                    quant_Data[col] *= ics[col[0:-4].upper()]
                else:
                    quant_Data[col] *= ics[col.upper()]
                if 'STD' not in col.upper() and f"{col}_STD" not in quant_Data.columns:
                    # add a new column called f"{col}_STD" filled with self.rng.sigmas[col.upper()]
                    quant_Data[f"{col}_STD"] = self.rng.sigmas[col.upper()]

        self.quant_data = quant_Data

    def check_compatibility(self) -> None:
        for s in self.species:
            if s.upper() not in self.rng.bounds.keys():
                self.rng.bounds[s] = [0, 0]
                print(f"Creating new entry for {s}\n")
            #else:
            #    print('All compatible\n')


In [51]:
class Genesis:
    def __init__(self, experiment: Experiment, max_digit = 4):
        self.experiment = experiment
        self.maxdigit = max_digit

    def create_xml_files(self, output_xmls_path: str, num_xmls: int, xml_template_path: str) -> None:
        if not os.path.exists(output_xmls_path):
            os.makedirs(output_xmls_path)
        self.num_xmls = num_xmls
        env = jinja2.Environment(loader=jinja2.FileSystemLoader(os.path.dirname(xml_template_path)))
        self.template = env.get_template(os.path.basename(xml_template_path))

        for i in range(1, num_xmls+1):
            np.random.seed(i)
            self.random_ics = self.get_random_ics()
            self.experiment.quantitated_exp_data(ics=self.random_ics)
            self.make_xml_output(i, output_xmls_path)

    def get_random_ics(self) -> dict[str, float]:
        random_ics = {}
        for key in self.experiment.species:
            if key not in self.experiment.rng.must_be_zero:
                random_ics[key] = np.random.uniform(*self.experiment.rng.bounds[key])
            else:
                random_ics[key] = 0
        for key in self.experiment.inputs.keys():
            random_ics[key] = self.experiment.inputs[key]
        random_ics["REF"] = 1.0
        return random_ics

    def make_xml_output(self, file_index: int, output_xmls_path: str) -> None:
        dataPoints = [self.compileDataRow(row.values) for _, row in self.experiment.quant_data.iterrows()]
        output = self.template.render(ics=self.random_ics, variables=self.experiment.species,
                                      dataPoints=dataPoints, bib=self.experiment.bibtex)
        padded_number = str(file_index).zfill(self.maxdigit)
        filename = f"stressful_life_{padded_number}.xml"
        with open(os.path.join(output_xmls_path, filename), 'w') as f:
            f.write(output)

    def compileDataRow(self, dataPoints):
        meas = "".join(f"<{v}>{{:.4e}}</{v}>" for v in self.experiment.experiment_data.columns)
        return f"<dataPoint>{meas.format(*dataPoints)}</dataPoint>"


In [28]:
class Simulation:
    def __init__(self, gen: Genesis, xmls_in_one_opp: int) -> None:
      self.gen = gen
      self.get_xml_vec(xmls_in_one_opp)

    def get_xml_vec(self, xmls_in_one_opp) -> None:
      self.xmls = []
      for i in range(1, self.gen.num_xmls, xmls_in_one_opp):
          xml_cnt = np.arange(i, i+xmls_in_one_opp, 1) 
          self.xmls.append(xml_cnt)

    def opp_gen(self, opp_output_dir, opp_name_prefix, kiir, mech_file, food) -> None:
      self.opps = []
      self.indices = []
      self.food = food
      self.mech_file = mech_file
      self.mech_inp_name = self.mech_file.split('/')[-1].split('.inp')[0]
      for num in self.xmls:
          opp_filename = f"{opp_name_prefix}_{num[-1]}.opp"
          self.opps.append(opp_filename)
          self.indices.append(f"{num[-1]}")
          if kiir:
            opp_content = self.generate_opp_content(xml_folder=self.gen.output_dir,
                                                    num_xmls=num,
                                                    mech_file=mech_file,
                                                    name=self.food)
            with open(os.path.join(opp_output_dir, opp_filename), "w") as f:
              f.write(opp_content)

    def generate_opp_content(self, xml_folder: str, num_xmls: Union[list[int], list[list[int]]],
                             mech_file: str, name: str, time_limit: int = 50, thread_limit: int = 32,
                             settings_tag: str = "systems_biology", solver: str = "cantera") -> str:
      mech_name = mech_file.split('/')[-1].split('.inp')[0]
      yaml = mech_file.split('.inp')[0]

      # Create MECHMOD section
      mechmod = f"""MECHMOD
      USE_NAME         {mech_name}
      MECH_FILE        {mech_file}
      COMPILE_{solver} {yaml}.yaml
      END
      """

      # Create MECHTEST section
      mechtest = f"""MECHTEST
      MECHANISM  {mech_name}
      TIME_LIMIT {time_limit}
      THREAD_LIMIT {thread_limit}
      SETTINGS_TAG {settings_tag}
      FALLBACK_TO_DEFAULT_SETTINGS
      PLOTS FALSE

      SOLVER {solver}
      SAVE_STATES      CSV
      """

      # Add each XML file name
      for xml in num_xmls:
          padded_number = str(xml).zfill(self.gen.maxdigit)
          mechtest += f"      NAME {xml_folder}/{name}_{padded_number}.xml\n"

      mechtest += "END\n"

      return mechmod + "\n" + mechtest

    def sim_runner(self, log_location:str = ''):
      self.parent_path = Path.cwd().parents[2]

      if log_location == '':
        for idx, opp_file in enumerate(self.opps):
            command = ["bin/Release/OptimaPP", f"7_Krisztian/1_mechtest/{opp_file}"]
            print(f"Running: {' '.join(command)}")
            subprocess.run(command, check=True, cwd=self.parent_path)
      else:
        for idx, opp_file in enumerate(self.opps):
          command = ["bin/Release/OptimaPP", f"7_Krisztian/1_mechtest/{opp_file}"]
          print(f"Running: {' '.join(command)}")
          log_idx = self.xmls[idx][-1]
          with open(f"{log_location}/run_log_stac_starve_rap{log_idx}.txt", "w") as log:
              subprocess.run(command, check=True, stdout=log, stderr=subprocess.STDOUT, cwd=self.parent_path)

In [23]:
# 1e-12 scaling for mol/cm^3
# uniformly dist. rap of [rap_input] nano mol     NOT     mol/dm^3 (= nM) cc.
# in a volume of V = 10e-12 dm^3
# uniformly "absorbed" by 2*10^5 cells
rap_input = np.array([100, 50, 10, 5, 1, 0.5, 0.1, 0]) * 1e-12  # Converting to mol/cm^3 (=mol/mL)
rap_in_cells = rap_input / (200000 * 1e-12 * 1e+3)  # rap_in_well_mol / (cell_num * V_cell_in_L * conversion_factor_to_mL)
stress1 = 'rap'
stresses: dict[str, NDArray[np.float_]] = {stress1: rap_in_cells}
stresses

{'rap': array([5.0e-07, 2.5e-07, 5.0e-08, 2.5e-08, 5.0e-09, 2.5e-09, 5.0e-10,
        0.0e+00])}

In [70]:
input_names = ['nS', 'RAP', 'TG', 'dS', 'CCH', 'REF', 'Insulin', 'TG_SERCA', 'RKMTORA', 'casp', 'IP3R', 'Baxa', 'tBid']
must_be_zero = ['casp', 'Baxa', 'tBid', 'p53a', 'PUMA']

rng = Theoretical_Ranges('../input_files/reactions_ics_finalised.xlsx',
                         input_names, must_be_zero)

[1e-14, 1e-13]
LUT was created successfully. Its dimensions are: (85, 4)


In [86]:
exp_xlsx_path = '../input_files/Mattiolo_2015_tester.xlsx'
xml_template = '../input_files/xml_template.xml'

# Parse stresses

# Read all sheets from Excel
all_sheets = pd.read_excel(exp_xlsx_path, sheet_name=None)  # dict of {sheet_name: DataFrame}

# Extract BibTeX from the last sheet
last_sheet_name = list(all_sheets.keys())[-1]
bibtex_df = all_sheets[last_sheet_name]
# Ha nem lenne header a BibTex-nel, akk ezzel kell beolvasni a sheetet: bibtex_df = pd.read_excel(exp_xlsx_path, sheet_name=last_sheet_name, header=None)

# Join all non-empty strings from the first column into a BibTeX string
bibtex_lines = bibtex_df.iloc[:, 0].dropna().astype(str).tolist()
bibtex_str = "\n".join(bibtex_lines)
bibtex_str = "\n".join(bibtex_lines).replace("_x000d_", "")  # Clean malformed carriage returns

#print("BibTex:\n", bibtex_str, "\n")
opp_output_dir = '../../1_mechtest'
num_xmls = 10

date = datetime.datetime.now()
date_prefix = f"{date.year}{date.month}{date.day}"

for i, sheet_name in enumerate(list(all_sheets.keys())[:-1]):
    df = all_sheets[sheet_name]
    exp = Experiment(df, stresses['rap'][i], rng, sheet_name, bibtex_str)
    output_dir = f"../../xml/{exp.bibtex['author'].split()[0][:-1]}_{exp.bibtex['year']}/{exp.name}"
    gen = Genesis(exp, max_digit=4)
    gen.create_xml_files(output_dir, num_xmls, xml_template)
    sim = Simulation(gen, num_xmls)
    sim.opp_gen(opp_output_dir=opp_output_dir,
                opp_name_prefix=f'{date_prefix}',
                kiir=False, mech_file='7_Krisztian/mech/BCRN6.inp',
                food='rap')
    

In [87]:
exp.quant_data

Unnamed: 0,time,CYTC,CYTC_STD
0,0,5.27e-15,1.13e-14
1,8,3.51e-14,1.13e-14
2,22,7.94e-14,1.13e-14


In [88]:
gen.random_ics

{'CYTC': 7.941885789400714e-14,
 'nS': 0.0,
 'RAP': 0.0,
 'TG': 0.0,
 'dS': 0.0,
 'CCH': 0.0,
 'REF': 1.0,
 'Insulin': 1e-10,
 'TG_SERCA': 0.0,
 'RKMTORA': 0.0,
 'casp': 0.0,
 'IP3R': 0.0,
 'Baxa': 0.0,
 'tBid': 0.0}

In [80]:
exp.experiment_data

Unnamed: 0,time,CASP,CASP_STD
0,0,1.0,0.0
1,3,904000.0,0.188
2,6,5.79,1.52
3,9,4.04,0.542
4,12,3.98,1.17
5,16,7.15,0.5
6,24,7.52,0.583


In [73]:
rng.bounds['CYTC']

[1e-14, 1e-13]

In [74]:
gen.random_ics

{'CYTC': 7.941885789400714e-14,
 'nS': 0.0,
 'RAP': 0.0,
 'TG': 0.0,
 'dS': 0.0,
 'CCH': 0.0,
 'REF': 1.0,
 'Insulin': 1e-10,
 'TG_SERCA': 0.0,
 'RKMTORA': 0.0,
 'casp': 0.0,
 'IP3R': 0.0,
 'Baxa': 0.0,
 'tBid': 0.0}

In [None]:
# I have to be in the folder where 'bin' is, or rewrite the commands below!!!!!!

# bin/Release/OptimaPP 7_Krisztian/1_mechtest/2025625_BCRN_Beesabathuni_Rap_100_nM.opp