In [None]:
import pandas as pd
import numpy as np
from typing import Dict, Any
import os

# Create sample MOF synthesis dataset
def create_sample_mof_dataset() -> pd.DataFrame:
    """
    Create a sample MOF synthesis dataset for demonstration purposes.
    This is mock data representing typical MOF synthesis conditions.
    """
    
    # Sample MOF synthesis data
    data = [
        {
            "Target": "MOF-5",
            "Metal source (mmol)": "Zn(NO3)2·6H2O (0.5)",
            "Linker(s) (mmol)": "H2BDC (0.5)",
            "Solvent(s) (mL)": "DMF (10)",
            "Modulator / Additive": "H2O (0.5)",
            "Temp (°C)": "120",
            "Time (h)": "24",
            "Pressure": "solvothermal",
            "Method": "solvothermal",
            "Wash / Activation": "DMF, methanol, chloroform"
        },
        {
            "Target": "HKUST-1",
            "Metal source (mmol)": "Cu(NO3)2·3H2O (0.3)",
            "Linker(s) (mmol)": "H3BTC (0.2)",
            "Solvent(s) (mL)": "DMF:H2O (8:2)",
            "Modulator / Additive": "",
            "Temp (°C)": "85",
            "Time (h)": "12",
            "Pressure": "ambient",
            "Method": "room temperature",
            "Wash / Activation": "DMF, ethanol"
        },
        {
            "Target": "UiO-66",
            "Metal source (mmol)": "ZrCl4 (0.1)",
            "Linker(s) (mmol)": "H2BDC (0.1)",
            "Solvent(s) (mL)": "DMF (5)",
            "Modulator / Additive": "HCl (0.1)",
            "Temp (°C)": "120",
            "Time (h)": "48",
            "Pressure": "solvothermal",
            "Method": "solvothermal",
            "Wash / Activation": "DMF, methanol, acetone"
        },
        {
            "Target": "MIL-101",
            "Metal source (mmol)": "Cr(NO3)3·9H2O (0.2)",
            "Linker(s) (mmol)": "H2BDC (0.2)",
            "Solvent(s) (mL)": "H2O (10)",
            "Modulator / Additive": "HF (0.1)",
            "Temp (°C)": "220",
            "Time (h)": "8",
            "Pressure": "solvothermal",
            "Method": "hydrothermal",
            "Wash / Activation": "H2O, ethanol"
        },
        {
            "Target": "ZIF-8",
            "Metal source (mmol)": "Zn(NO3)2·6H2O (0.5)",
            "Linker(s) (mmol)": "2-methylimidazole (2.0)",
            "Solvent(s) (mL)": "methanol (20)",
            "Modulator / Additive": "",
            "Temp (°C)": "25",
            "Time (h)": "2",
            "Pressure": "ambient",
            "Method": "room temperature",
            "Wash / Activation": "methanol"
        }
    ]
    
    return pd.DataFrame(data)

# Create and save the dataset
if __name__ == "__main__":
    # Create sample dataset
    df = create_sample_mof_dataset()
    
    # Ensure processed directory exists
    os.makedirs("../data/processed", exist_ok=True)
    
    # Save to CSV
    output_path = "../data/processed/mof_runs.csv"
    df.to_csv(output_path, index=False)
    
    print(f"Sample MOF dataset created with {len(df)} rows")
    print(f"Saved to: {output_path}")
    print("\nDataset preview:")
    print(df.head())
    
    print("\nDataset info:")
    print(df.info())
    
    print("\nUnique values in categorical columns:")
    for col in ["Pressure", "Method"]:
        print(f"{col}: {df[col].unique()}")
