In [0]:
%restart_python
%load_ext autoreload
%autoreload 2
%pip install fpdf
%pip install reportlab
%pip install --upgrade crewai
%pip install --upgrade "mlflow[databricks]>=3.1" crewai
# Enables autoreload; learn more at https://docs.databricks.com/en/files/workspace-modules.html#autoreload-for-python-modules
# To disable autoreload; run %autoreload 0

In [0]:
# List installed packages
%pip freeze

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import sys
import os
from typing import Dict
import seaborn as sns
import base64
import shutil

def analyze_workflow(results):
    
    print(f"\nSimulation complete!")
    print(f"   Run ID: {results['run_id']}")
    print(f"   Final Titer: {results['final_titer']:.2f} mg/mL")

    # output simulation visualizations
    fig = visualize_run(results)

    # query telemetry data (temporal data from simulation log)
    run_id = results['run_id']
    telemetry_df = workflow.data_lake.get_run_telemetry(spark, run_id, is_observed=True)

    print(f"Telemetry Data Shape: {telemetry_df.shape}")
    print(f"\nSignals captured: {telemetry_df['signal_name'].unique()}")
    print(f"Time range: {telemetry_df['time_h'].min():.1f} - {telemetry_df['time_h'].max():.1f} hours")

    # telemetry sample...
    display(telemetry_df.head(10))
    anomalies_df = workflow.data_lake.get_anomalies(spark, run_id, only_detected=True)

    print(f"Anomalies Detected: {len(anomalies_df)}")
    if not anomalies_df.empty:
        print("\nAnomalies by signal:")
        print(anomalies_df.groupby('signal_name').size())
        print("\nAnomalies by method:")
        print(anomalies_df.groupby('method').size())
        
        display(anomalies_df.head(10))
    else:
        print("No anomalies detected in this run")

    return fig

def create_download_link(file_path, link_text="Download PDF"):
    with open(file_path, "rb") as f:
        data = f.read()
    b64 = base64.b64encode(data).decode()
    href = f'<a href="data:application/pdf;base64,{b64}" download="{os.path.basename(file_path)}">{link_text}</a>'
    from IPython.display import display, HTML
    display(HTML(href))

# setup
PROJECT_ROOT = '/Workspace/Repos/synthetic-twin/synthetic_twin'
MODULES_PATH = os.path.join(PROJECT_ROOT, 'modules')
if MODULES_PATH not in sys.path:
    sys.path.insert(0, MODULES_PATH)

# import modules
from config import (SIMULATION_PARAMS, INITIAL_STATE, KINETIC_PARAMS, 
                    REACTOR_PARAMS, SENSOR_PARAMS, FAULT_TEMPLATES, SCENARIOS, SCORING_CONFIG)
from models import BioreactorSimulation, FaultManager
from anomaly_detection import (AnomalyDetectionEngine, create_default_bioreactor_config)
from data_lake import BioreactorDataLake
from run_simulation_workflow import SABRWorkflow, visualize_run
from reporting import BioreactorPDFReport
print("All modules imported successfully!")

# configuration
config = {
    'SIMULATION_PARAMS': SIMULATION_PARAMS,
    'INITIAL_STATE': INITIAL_STATE,
    'KINETIC_PARAMS': KINETIC_PARAMS,
    'REACTOR_PARAMS': REACTOR_PARAMS,
    'SENSOR_PARAMS': SENSOR_PARAMS,
    'FAULT_TEMPLATES': FAULT_TEMPLATES
    }

# initialize workflow
workflow = SABRWorkflow(spark=spark,config_dict=config,
                            enable_agent=True,enable_anomaly_detection=True)

# test multiple scenarios with replicates
scenarios_to_test = ['standard', 'overfeed', 'DO_drop', 'contamination']

print("Running batch scenario analysis...")
print(f"   Scenarios: {scenarios_to_test}")
print(f"   Replicates per scenario: 1")
print(f"   Total runs: {len(scenarios_to_test) * 3}")
print("\nThis may take several minutes...\n")

reporter = BioreactorPDFReport()
# create a timestamped output folder
timestamp_str = datetime.now().strftime("%Y%m%d_%H%M%S")
output_folder = f"/tmp/sabr_runs_{timestamp_str}"
os.makedirs(output_folder, exist_ok=True)

all_results = {}

for scenario in scenarios_to_test:
    workflow.inject_scenario_faults(scenario=scenario)
    for rep in range(1, 2):  # 1 replicates
        print(f"\nRunning simulation: {scenario}, replicate {rep}")
        results = workflow.run_with_monitoring(base_feed_rate=0.1, save_to_lake=True)
        fig = analyze_workflow(results)
        
        run_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        run_name = f"{scenario}_rep{rep}_{run_timestamp}"
        
        # store results in dictionary
        all_results[run_name] = results
        
        # save CSV summary
        csv_path = os.path.join(output_folder, f"{run_name}_summary.csv")
        pd.DataFrame([{
            'run_id': results['run_id'],
            'scenario': scenario,
            'replicate': rep,
            'final_titer_g_L': float(results['final_titer']),
            'final_biomass_g_L': float(results['final_biomass']),
            'total_anomalies': int(results['num_anomalies']),
            'success': bool(results['final_titer'] > 5.0),
            'timestamp': run_timestamp,
            'agent_explain': str(results['agent_explain'])
        }]).to_csv(csv_path, index=False)
        print(f"Summary CSV saved to {csv_path}")

        pdf_file = reporter.generate_summary_pdf(
            results=results,
            telemetry_df=pd.DataFrame(results['observed_history']),
            ai_summary=str(results['agent_explain']),
            faults=[FAULT_TEMPLATES[scenario]],
            param_config=config,
            figures=[fig]
        )
        pdf_path = os.path.join(output_folder, f"{run_name}_report.pdf")
        shutil.move(pdf_file, pdf_path)
        print(f"PDF report saved to {pdf_path}")
        create_download_link(pdf_path)