In [1]:
# from pyDOE import lhs
# import numpy as np
# import pandas as pd
# import pickle

# def define_parameter_space():
#     return {
#         'param1': {'min': 20.0, 'max': 200.0, 'name': 'T1Celsius'},
#         'param2': {'min': 10.0, 'max': 60.0, 'name': 't1min'},
#         'param3': {'min': 20.0, 'max': 200.0, 'name': 'T2Celsius'},
#         'param4': {'min': 10.0, 'max': 60.0, 'name': 't2min'},
#         'param5': {'min': 1.0, 'max': 2.0, 'name': 'EquivalentsReagent1'},
#         'param6': {'min': 1.0, 'max': 5.0, 'name': 'EquivalentsBASE1'},
#         'param7': {'min': 0.1, 'max': 1.0, 'name': 'ConcentrationMolar'}
#     }
    
# def generate_initial_samples(param_space, n_samples,  method='lhs', criterion='center'):
#     """
#     Generate initial samples and calculate corresponding MMA and AA values
    
#     Parameters:
#     - param_space: Dictionary defining the parameter ranges
#     - n_samples: Number of samples to generate
#     - calculator: Instance of ScaledCalculator for computing MMA and AA
#     - method: Sampling method ('lhs' or 'random')
#     - criterion: Criterion for LHS sampling
#     """
#     valid_criteria = ['center', 'maximin', 'centermaximin', 'correlation']
#     column_names = [param_space['param1']['name'], param_space['param2']['name'], param_space['param3']['name']]
    
#     if method.lower() == 'lhs':
#         if criterion not in valid_criteria:
#             raise ValueError(f"Invalid criterion. Must be one of {valid_criteria}")
#         samples = lhs(len(param_space), samples=n_samples, criterion=criterion)
#     else:
#         samples = np.random.rand(n_samples, len(param_space))
    
#     X_init = np.zeros_like(samples)
#     for i, param in enumerate(param_space.keys()):
#         param_range = param_space[param]['max'] - param_space[param]['min']
#         param_min = param_space[param]['min']
#         X_init[:, i] = samples[:, i] * param_range + param_min
    
#     # Create initial DataFrame
#     df = pd.DataFrame(X_init, columns=column_names)
    
#     # # Calculate Yield, Impurity and ImpurityXRatio for each row
#     # Yield = []
#     # Impurity = []
#     # ImpurityXRatio = []
    
#     # for _, row in df.iterrows():
#     #     mma = calculator.calculate_mma(row['pH'], row['Time'], row['Temp'])
#     #     aa = calculator.calculate_aa(row['pH'], row['Time'], row['Temp'])
#     #     mma_values.append(mma)
#     #     aa_values.append(aa)
    
#     # # Add MMA and AA columns
#     # df['Yield'] = Yield
#     # df['Impurity'] = Impurity
#     # df['ImpurityXRatio'] = ImpurityXRatio
    
#     return df

# # Usage example:
# # Initialize the calculator

# # Generate the data
# param_space = define_parameter_space()
# n_samples = 10
# data = generate_initial_samples(param_space, n_samples)

# # Display the results
# print("Generated dataset with calculated MMA and AA values:")
# print(data)
# # data.to_csv('optimization.csv')


In [2]:
import numpy as np
import pandas as pd
from pyDOE import lhs
from scipy.integrate import solve_ivp

def define_parameter_space():
    return {
        'param1': {'min': 20.0, 'max': 200.0, 'name': 'T1Celsius'},
        'param2': {'min': 10.0, 'max': 60.0, 'name': 't1min'},
        'param3': {'min': 20.0, 'max': 200.0, 'name': 'T2Celsius'},
        'param4': {'min': 10.0, 'max': 60.0, 'name': 't2min'},
        'param5': {'min': 1.0, 'max': 2.0, 'name': 'EquivalentsReagent1'},
        'param6': {'min': 1.0, 'max': 5.0, 'name': 'EquivalentsBASE1'},
        # 'param7': {'min': 0.1, 'max': 1.0, 'name': 'ConcentrationMolar'}
        'param7': {'min': 0.82, 'max': 0.82, 'name': 'ConcentrationMolar'}
    }

def reaction_rates(t, y, params, T):
    """Calculates reaction rates."""
    REAGENT1, BASE1, REAGENT1K, MEOH, SUBSTRATEX, SUBSTRATEXK, PRODUCTXKK, PRODUCTXK, IMP2, H2O = y
    R = 8.314e-3  # Gas constant in kJ/mol.K

    def calculate_k(k_ref, Ea, T_ref):
        return k_ref * np.exp(-Ea / R * (1/T - 1/T_ref))
    
    def calculate_k_reverse(k_forward, Keq):
        return k_forward / Keq

    # Calculate rate constants
    k1_f = calculate_k(params['k1_f'], params['Ea1_f'], params['T_ref'])
    k2_f = calculate_k(params['k2_f'], params['Ea2_f'], params['T_ref'])
    k3_f = calculate_k(params['k3_f'], params['Ea3_f'], params['T_ref'])
    k4_f = calculate_k(params['k4_f'], params['Ea4_f'], params['T_ref'])
    k5_f = calculate_k(params['k5_f'], params['Ea5_f'], params['T_ref'])

    k1_r = calculate_k_reverse(k1_f, params['Keq1'])
    k2_r = calculate_k_reverse(k2_f, params['Keq2'])
    k4_r = calculate_k_reverse(k4_f, params['Keq4'])

    # Calculate reaction rates
    r1 = k1_f * REAGENT1 * BASE1 - k1_r * REAGENT1K * MEOH
    r2 = k2_f * SUBSTRATEX * BASE1 - k2_r * SUBSTRATEXK * MEOH
    r3 = k3_f * REAGENT1K * SUBSTRATEXK
    r4 = k4_f * PRODUCTXKK * MEOH - k4_r * PRODUCTXK * BASE1
    r5 = k5_f * PRODUCTXK * SUBSTRATEXK

    # Calculate derivatives
    dydt = [
        -r1,                    # dREAGENT1dt
        -r1 - r2 + r4,         # dBASE1dt
        r1 - r3,               # dREAGENT1Kdt
        r1 + r2 - r3,          # dMEOHdt
        -r2,                   # dSUBSTRATEXdt
        r2 - r3 - r5,          # dSUBSTRATEXKdt
        r3 - r4,               # dPRODUCTXKKdt
        r4 - r5,               # dPRODUCTXKdt
        r5,                    # dIMP2dt
        r5                     # dH2Odt
    ]
    return dydt

def run_reaction(T1, t1, T2, t2, SUBSTRATEXK, BASE1, Reagent1, MeOH, V, params):
    """Simulates the reaction with temperature changes."""
    y0 = [Reagent1/V, BASE1/V, 0, MeOH/V, 0, SUBSTRATEXK/V, 0, 0, 0, 0]

    t_span1 = (0, t1)
    t_span2 = (t1, t1 + t2)

    try:
        sol1 = solve_ivp(reaction_rates, t_span1, y0, args=(params, T1), dense_output=True, max_step=1)
        if sol1.status != 0:
            return None

        y0_updated = sol1.y[:, -1]
        sol2 = solve_ivp(reaction_rates, t_span2, y0_updated, args=(params, T2), dense_output=True, max_step=1)
        if sol2.status != 0:
            return None

        t_combined = np.concatenate([sol1.t, sol2.t])
        y_combined = np.concatenate([sol1.y, sol2.y], axis=1)
        return type('obj', (object,), {'t': t_combined, 'y': y_combined})()
    
    except Exception as e:
        print(f"Error in run_reaction: {e}")
        return None

def calculate_reaction_outputs(params_dict):
    """Calculate reaction outputs for a given set of parameters."""
    # Reaction parameters
    V = 1  # Volume (Liters)
    MeOH = 0.0161 * 1000  # Initial moles of solvent

    # Convert time and temperature
    t1 = params_dict['t1min'] * 60
    t2 = params_dict['t2min'] * 60
    T1 = 273.15 + params_dict['T1Celsius']
    T2 = 273.15 + params_dict['T2Celsius']
    
    # Calculate initial concentrations
    SUBSTRATEXK = params_dict['ConcentrationMolar']
    Reagent1 = SUBSTRATEXK * params_dict['EquivalentsReagent1']
    BASE1 = SUBSTRATEXK * params_dict['EquivalentsBASE1']

    # Kinetic parameters
    kinetic_params = {
        'k1_f': 0.0131867209533307, 'Ea1_f': 39.247, 'Keq1': 0.34305, 'Ea1_r': 35.0979,
        'k2_f': 0, 'Ea2_f': 60.001, 'Keq2': 0.7140, 'Ea2_r': 59.999,
        'k3_f': 0.0717498202, 'Ea3_f': 51.49215,
        'k4_f': 0.0000000177, 'Ea4_f': 61.37373, 'Keq4': 0.00004, 'Ea4_r': 0,
        'k5_f': 0.001715, 'Ea5_f': 107.16032,
        'T_ref': 395.15
    }

    # Run reaction
    sol = run_reaction(T1, t1, T2, t2, SUBSTRATEXK, BASE1, Reagent1, MeOH, V, kinetic_params)
    
    if sol is None:
        return None

    # Calculate outputs
    concentrations = sol.y
    Conversion = (concentrations[6, -1] + concentrations[7, -1]) / SUBSTRATEXK
    Yield = Conversion
    Impurity = (concentrations[8, -1]) / (SUBSTRATEXK/V) * 100
    ImpurityXRatio = (concentrations[6, -1] + concentrations[7, -1] + concentrations[0, -1] + concentrations[2, -1]) / (concentrations[6, -1] + concentrations[7, -1])

    return {
        'Yield': Yield,
        'Impurity': Impurity,
        'ImpurityXRatio': ImpurityXRatio
    }

def generate_initial_samples(param_space, n_samples, method='lhs', criterion='center'):
    """Generate initial samples and calculate corresponding outputs."""
    valid_criteria = ['center', 'maximin', 'centermaximin', 'correlation']
    
    if method.lower() == 'lhs':
        if criterion not in valid_criteria:
            raise ValueError(f"Invalid criterion. Must be one of {valid_criteria}")
        samples = lhs(len(param_space), samples=n_samples, criterion=criterion)
    else:
        samples = np.random.rand(n_samples, len(param_space))

    # Scale samples to parameter ranges
    X_init = np.zeros_like(samples)
    param_names = []
    for i, param in enumerate(param_space.keys()):
        param_range = param_space[param]['max'] - param_space[param]['min']
        param_min = param_space[param]['min']
        X_init[:, i] = samples[:, i] * param_range + param_min
        param_names.append(param_space[param]['name'])

    # Create initial DataFrame
    df = pd.DataFrame(X_init, columns=param_names)

    # Calculate outputs for each sample
    yields = []
    impurities = []
    impurity_ratios = []

    for idx, row in df.iterrows():
        print(f"Processing sample {idx + 1}/{n_samples}")
        outputs = calculate_reaction_outputs(row)
        
        if outputs is None:
            yields.append(np.nan)
            impurities.append(np.nan)
            impurity_ratios.append(np.nan)
        else:
            yields.append(outputs['Yield'])
            impurities.append(outputs['Impurity'])
            impurity_ratios.append(outputs['ImpurityXRatio'])

    # Add output columns
    df['Yield'] = yields
    df['Impurity'] = impurities
    df['ImpurityXRatio'] = impurity_ratios

    return df

# Example usage
if __name__ == "__main__":
    param_space = define_parameter_space()
    n_samples = 10
    df = generate_initial_samples(param_space, n_samples)
    
    print("\nGenerated dataset with calculated outputs:")
    print(df)
    
    # Save to CSV
    df.to_csv('optimization.csv', index=False)

Processing sample 1/10
Processing sample 2/10
Processing sample 3/10
Processing sample 4/10
Processing sample 5/10
Processing sample 6/10
Processing sample 7/10
Processing sample 8/10
Processing sample 9/10
Processing sample 10/10

Generated dataset with calculated outputs:
   T1Celsius  t1min  T2Celsius  t2min  EquivalentsReagent1  EquivalentsBASE1  \
0      137.0   12.5      137.0   42.5                 1.05               2.8   
1      191.0   17.5      119.0   22.5                 1.65               2.0   
2       47.0   52.5      101.0   32.5                 1.35               2.4   
3      101.0   22.5       29.0   37.5                 1.15               4.8   
4       29.0   37.5      173.0   27.5                 1.95               1.6   
5       65.0   57.5       47.0   57.5                 1.55               4.0   
6      155.0   42.5      155.0   17.5                 1.25               4.4   
7       83.0   27.5       83.0   12.5                 1.85               3.2   
8    