In [0]:
%restart_python
%load_ext autoreload
%autoreload 2
#%pip install --upgrade crewai
#%pip install --upgrade "mlflow[databricks]>=3.1" crewai
# Enables autoreload; learn more at https://docs.databricks.com/en/files/workspace-modules.html#autoreload-for-python-modules
# To disable autoreload; run %autoreload 0

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import sys
import os
from typing import Dict
import seaborn as sns

def analyze_workflow(results):
    
    print(f"\nSimulation complete!")
    print(f"   Run ID: {results['run_id']}")
    print(f"   Final Titer: {results['final_titer']:.2f} mg/mL")

    # output simulation visualizations
    visualize_run(results)

    # query telemetry data (temporal data from simulation log)
    run_id = results['run_id']
    telemetry_df = workflow.data_lake.get_run_telemetry(spark, run_id, is_observed=True)

    print(f"Telemetry Data Shape: {telemetry_df.shape}")
    print(f"\nSignals captured: {telemetry_df['signal_name'].unique()}")
    print(f"Time range: {telemetry_df['time_h'].min():.1f} - {telemetry_df['time_h'].max():.1f} hours")

    # telemetry sample...
    display(telemetry_df.head(10))
    anomalies_df = workflow.data_lake.get_anomalies(spark, run_id, only_detected=True)

    print(f"Anomalies Detected: {len(anomalies_df)}")

    if not anomalies_df.empty:
        print("\nAnomalies by signal:")
        print(anomalies_df.groupby('signal_name').size())
        
        print("\nAnomalies by method:")
        print(anomalies_df.groupby('method').size())
        
        display(anomalies_df.head(10))
    else:
        print("No anomalies detected in this run")

# setup
PROJECT_ROOT = '/Workspace/Repos/synthetic-twin/synthetic_twin'
MODULES_PATH = os.path.join(PROJECT_ROOT, 'modules')
if MODULES_PATH not in sys.path:
    sys.path.insert(0, MODULES_PATH)

# import modules
from config import (SIMULATION_PARAMS, INITIAL_STATE, KINETIC_PARAMS, 
                    REACTOR_PARAMS, SENSOR_PARAMS, FAULT_TEMPLATES, SCENARIOS, SCORING_CONFIG)

from models import BioreactorSimulation, FaultManager
from anomaly_detection import (AnomalyDetectionEngine, create_default_bioreactor_config)
from data_lake import BioreactorDataLake
from run_simulation_workflow import BioPilotWorkflow, visualize_run
print("All modules imported successfully!")

# configuration
config = {
    'SIMULATION_PARAMS': SIMULATION_PARAMS,
    'INITIAL_STATE': INITIAL_STATE,
    'KINETIC_PARAMS': KINETIC_PARAMS,
    'REACTOR_PARAMS': REACTOR_PARAMS,
    'SENSOR_PARAMS': SENSOR_PARAMS,
    'FAULT_TEMPLATES': FAULT_TEMPLATES
    }

# initialize workflow
workflow = BioPilotWorkflow(spark=spark,config_dict=config,
                            enable_agent=True,enable_anomaly_detection=True)

# sample workflow:
# injection of a fault scenario example - overfeeding @ 20h
print("Injecting 'overfeed' fault at t=20h...")
workflow.inject_scenario_faults(scenario="overfeed")

# initiate simulation with a default base feed rate
# TODO: make base feed rate customizable/dynamic
print("\nRunning simulation...")
results = workflow.run_with_monitoring(base_feed_rate=0.1,save_to_lake=True)
analyze_workflow(results)

# test multiple scenarios with replicates
scenarios_to_test = ['standard', 'overfeed', 'DO_drop', 'contamination']

print("Running batch scenario analysis...")
print(f"   Scenarios: {scenarios_to_test}")
print(f"   Replicates per scenario: 3")
print(f"   Total runs: {len(scenarios_to_test) * 3}")
print("\nThis may take several minutes...\n")

for scenario in scenarios_to_test:
    workflow.inject_scenario_faults(scenario=scenario)
    print(f"\nRunning simulation for {scenario}...")
    results = workflow.run_with_monitoring(base_feed_rate=0.1,save_to_lake=True)
    analyze_workflow(results)

# test sensitivity to different growth rates (mu_max) (parameter sweep)
mu_max_values = [0.03, 0.04, 0.05, 0.06]
sweep_results = []

print("Running parameter sweep: mu_max")
print(f"   Testing values: {mu_max_values}")

for mu_max in mu_max_values:
    print(f"\n   Testing mu_max = {mu_max}...")
    
    # modify config
    custom_config = config.copy()
    custom_config['KINETIC_PARAMS'] = KINETIC_PARAMS.copy()
    custom_config['KINETIC_PARAMS']['mu_max'] = mu_max
    
    # run simulation
    workflow_sweep = BioPilotWorkflow(spark=spark,config_dict=custom_config,
                                      enable_agent=True,enable_anomaly_detection=True)
    
    results_sweep = workflow_sweep.run_with_monitoring(base_feed_rate=0.1,save_to_lake=True)
    sweep_results.append({'mu_max': mu_max,
                          'final_titer': results_sweep['final_titer'],
                          'final_biomass': results_sweep['final_biomass']})

# analyze sweep results
sweep_df = pd.DataFrame(sweep_results)
print("\nParameter sweep complete...")
display(sweep_df)

# output sweep result visualizations
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

ax1.plot(sweep_df['mu_max'], sweep_df['final_titer'], 'o-', linewidth=2, markersize=8)
ax1.set_xlabel('mu_max [1/h]', fontsize=11)
ax1.set_ylabel('Final Titer [mg/mL]', fontsize=11)
ax1.set_title('Titer vs Growth Rate', fontweight='bold')
ax1.grid(True, alpha=0.3)

ax2.plot(sweep_df['mu_max'], sweep_df['final_biomass'], 'o-', linewidth=2, markersize=8, color='green')
ax2.set_xlabel('mu_max [1/h]', fontsize=11)
ax2.set_ylabel('Final Biomass [g/L]', fontsize=11)
ax2.set_title('Biomass vs Growth Rate', fontweight='bold')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# generate run summary
run_summary = workflow.data_lake.get_run_summary(spark, results['run_id'])
output_path = f"/tmp/biopilot_run_{results['run_id']}_summary.csv"

summary_export = pd.DataFrame([{
    'run_id': str(results['run_id']),
    'scenario': 'overfeed',
    'final_titer_mg_mL': float(results['final_titer']),
    'final_biomass_g_L': float(results['final_biomass']),
    'total_anomalies': int(results['num_anomalies']),
    'success': bool(results['final_titer'] > 5.0),
    'timestamp': datetime.now()}])

summary_export.to_csv(output_path, index=False)
print(f"Summary exported to: {output_path}")

# ++ telemetry data
telemetry_path = f"/tmp/biopilot_run_{results['run_id']}_telemetry.csv"
results['observed_history'].to_csv(telemetry_path, index=False)
print(f"Telemetry exported to: {telemetry_path}")

# cleanup...
print(f"\nrun ID: {results['run_id']}")
print(f"Final titer: {results['final_titer']:.2f} mg/mL")
