In [1]:
# imports
import os
import sys

# code for enabling this notebook to work within cursor
coralme_dir = '/home/chris/zuniga/coralme/' #'../'
sys.path.insert(0, coralme_dir)

import subprocess
from cobra.io import load_json_model, write_sbml_model
from bs4 import BeautifulSoup
import pandas as pd
import json

def extract_json_from_window_data(html):
    start = html.find("window.data =")
    if start == -1:
        raise ValueError("Could not find 'window.data =' in the HTML.")

    start += len("window.data =")
    i = start
    brace_count = 0
    in_string = False
    escape = False

    # Skip whitespace to find the first {
    while html[i] in " \n\r\t":
        i += 1

    if html[i] != '{':
        raise ValueError("Expected '{' after 'window.data ='")

    json_start = i
    brace_count += 1
    i += 1

    # Parse until all braces are closed
    while i < len(html):
        char = html[i]

        if in_string:
            if escape:
                escape = False
            elif char == '\\':
                escape = True
            elif char == '"':
                in_string = False
        else:
            if char == '"':
                in_string = True
            elif char == '{':
                brace_count += 1
            elif char == '}':
                brace_count -= 1
                if brace_count == 0:
                    return html[json_start:i + 1]
        i += 1

    raise ValueError("Could not parse full JSON object from 'window.data ='")


In [2]:
# run memote on each, saving to json
base_dir = os.path.join(coralme_dir, 'species_files', 'Pseudomonas_files')
output_dfs = []
for f in os.listdir(os.path.join(base_dir, 'individual_species')):
    if 'Reference' in f: continue
    
    # look to see if memote solution already exists
    M_json_path = os.path.join(base_dir, 'individual_species', f, 'inputs', 'model.json')
    M_xml_path = os.path.join(base_dir, 'individual_species', f, 'inputs', 'model.xml')
    report_path = os.path.join(base_dir, 'individual_species', f, 'outputs', 'memote_report.html')
    if os.path.exists(M_json_path) and not os.path.exists(M_xml_path):
        model = load_json_model(M_json_path)
        write_sbml_model(model, M_xml_path)

    # run memote
    if not os.path.exists(report_path):
        print(f+' running memote...')
        result = subprocess.run(['memote', 'report', 'snapshot', '--filename', report_path, M_xml_path], capture_output=True, text=True)

    # Load and extract results
    with open(report_path, encoding="utf-8") as file:
        html = file.read()
    
    json_str = extract_json_from_window_data(html)
    data = json.loads(json_str)
    
    # Summarize tests
    test_names = []
    test_results = []
    test_metrics = []
    
    # Loop through all tests in the memote data
    for test_id, test in data.get("tests", {}).items():
        name = test.get("title", test_id)
        result = test.get("result", None)
        metric = test.get("metric", None)
    
        # Normalize result (handle dicts like per-database results)
        if isinstance(result, dict):
            result = None
    
        # Normalize metric
        if not isinstance(metric, (int, float)):
            metric = None
    
        # Append to lists
        test_names.append(name)
        test_results.append(result)
        test_metrics.append(metric)
    
    # Optionally print the first few entries to verify
    output_df = pd.DataFrame(index = test_names)
    output_df[f+'_result'] = test_results
    output_df[f+'_metric'] = test_metrics
    output_dfs.append(output_df)

# TODO - concatenate all these output dataframes together