Use this code to generate synthetic data for Lutein production. The inputs of the data-driven model would be concentration of biomass (C_X) in the reactor, concentration of nitrate (C_N) in the reactor, concentration of Lutein (C_L) in the reactor, influx of nitrate (F_in), inlet nitrate concentration (C_N_in), light intensity (I0), and time t hours. The output should be C_X, C_N, and C_L at t+1 hours. 

In [1]:
# Following are parameters of the model mostly taken from https://aiche.onlinelibrary.wiley.com/doi/epdf/10.1002/aic.15667 but changed slightly (after unit conversion)
# Do not change this... 

u_m = 0.152 # 1/h
u_d = 5.95*1e-3 # 1/h
K_N = 30.0*1e-3 # g/L

Y_NX = 0.305 # g/g

k_m = 0.350*1e-3*2 #g/g-h

k_d = 3.71*0.05/90 # L/g-h

K_NL = 10.0*1e-3 # g/L

k_s = 142.8 # umol/m2-s

k_i = 214.2 # umol/m2-s

k_sL = 320.6 # umol/m2-s

k_iL = 480.9 # umol/m2-s

tau = 0.120 #m2/g

Ka = 0.0 #1/m

OG Loop

In [None]:
import numpy as np
import random

#To generate data, just change these values in this block (perhaps in a loop), In my opinion, 
#a good range for C_x0 (which is the initial concnentration of biomass in the reactor C_X) is 0.2 - 2 g/L
# a good range for C_N0 (which is the initial concnetraiton of nitrate in the reactor C_N) is 0.2 - 2 g/L
# a good range for F_in (the inlet flow rate of nitrate into the reactor) is 1e-3 1.5e-2 L/h
# a good range for C_N_in (the inlet concentration of nitrate feed to the reactor) is 5 - 15 g/L
# a good range for intensity of light is 100 - 200 umol/m2-s

C_x0 = 0.5 # g/L

C_N0 = 1 #g/L

F_in = 8e-3 #L/h

C_N_in = 10 #g/L

I0 = 150 # umol/m2-s

C_x0_r = (0.2, 2)
C_N0_r = (0.2, 2)
F_in_r = (1e-3, 1.5e-2)
C_N_in_r = (5, 15)
I0_r = (100, 200)

num_values = 100

synth_data = []




#for x in range(num_values):
    #C_x0 = round(random.uniform(C_x0_r[0], C_x0_r[1]), 1)
    #C_N0 = round(random.uniform(C_N0_r[0], C_N0_r[1]), 1)
    #F_in = round(random.uniform(F_in_r[0], F_in_r[1]), 3)
    #C_N_in = round(random.uniform(C_N_in_r[0], C_N_in_r[1]), 0)
    #I0 = round(random.uniform(I0_r[0], I0_r[1]), 0)
    
    #synth_data.append((C_x0, C_N0, F_in, C_N_in, I0))

for i, value in enumerate(synth_data):
    print(f"Value {i+1}: {value}")

#print(synth_data)

Combination Loop

In [8]:
import numpy as np

#Create array of test values for each variable
C_x0_r = np.linspace(0.2, 2, 20)
C_N0_r = np.linspace(0.2, 2, 20)
F_in_r = np.linspace(1e-3, 1.5e-2, 20)
C_N_in_r = np.linspace(5, 15, 20)
I0_r = np.linspace(100, 200, 20)

synth_data = []

num = 19
for x in range(num):
    for i in range(num):
        #I0 constant
        for index in range(num):
            C_x0 = C_x0_r[index]
            C_N0 = C_N0_r[index]
            F_in = F_in_r[index]
            C_N_in = C_N_in_r[index]
            I0 = I0_r[i]
            synth_data.append((C_x0, C_N0, F_in, C_N_in, I0))

## Combinations using Itertools

In [19]:
import itertools
import numpy as np

#Create array of test values for each variable
C_x0_r = np.linspace(0.2, 2, 3)
C_N0_r = np.linspace(0.2, 2, 3)
F_in_r = np.linspace(1e-3, 1.5e-2, 3)
C_N_in_r = np.linspace(5, 15, 3)
I0_r = np.linspace(100, 200, 3)

synth_data = []

synth_data = list(itertools.product(C_x0_r, C_N0_r, F_in_r, C_N_in_r, I0_r))

# Print the combinations
for combination in synth_data:
    print(combination)

(0.2, 0.2, 0.001, 5.0, 100.0)
(0.2, 0.2, 0.001, 5.0, 150.0)
(0.2, 0.2, 0.001, 5.0, 200.0)
(0.2, 0.2, 0.001, 10.0, 100.0)
(0.2, 0.2, 0.001, 10.0, 150.0)
(0.2, 0.2, 0.001, 10.0, 200.0)
(0.2, 0.2, 0.001, 15.0, 100.0)
(0.2, 0.2, 0.001, 15.0, 150.0)
(0.2, 0.2, 0.001, 15.0, 200.0)
(0.2, 0.2, 0.008, 5.0, 100.0)
(0.2, 0.2, 0.008, 5.0, 150.0)
(0.2, 0.2, 0.008, 5.0, 200.0)
(0.2, 0.2, 0.008, 10.0, 100.0)
(0.2, 0.2, 0.008, 10.0, 150.0)
(0.2, 0.2, 0.008, 10.0, 200.0)
(0.2, 0.2, 0.008, 15.0, 100.0)
(0.2, 0.2, 0.008, 15.0, 150.0)
(0.2, 0.2, 0.008, 15.0, 200.0)
(0.2, 0.2, 0.015, 5.0, 100.0)
(0.2, 0.2, 0.015, 5.0, 150.0)
(0.2, 0.2, 0.015, 5.0, 200.0)
(0.2, 0.2, 0.015, 10.0, 100.0)
(0.2, 0.2, 0.015, 10.0, 150.0)
(0.2, 0.2, 0.015, 10.0, 200.0)
(0.2, 0.2, 0.015, 15.0, 100.0)
(0.2, 0.2, 0.015, 15.0, 150.0)
(0.2, 0.2, 0.015, 15.0, 200.0)
(0.2, 1.1, 0.001, 5.0, 100.0)
(0.2, 1.1, 0.001, 5.0, 150.0)
(0.2, 1.1, 0.001, 5.0, 200.0)
(0.2, 1.1, 0.001, 10.0, 100.0)
(0.2, 1.1, 0.001, 10.0, 150.0)
(0.2, 1.1, 0.001, 10

In [20]:
synth_data

[(0.2, 0.2, 0.001, 5.0, 100.0),
 (0.2, 0.2, 0.001, 5.0, 150.0),
 (0.2, 0.2, 0.001, 5.0, 200.0),
 (0.2, 0.2, 0.001, 10.0, 100.0),
 (0.2, 0.2, 0.001, 10.0, 150.0),
 (0.2, 0.2, 0.001, 10.0, 200.0),
 (0.2, 0.2, 0.001, 15.0, 100.0),
 (0.2, 0.2, 0.001, 15.0, 150.0),
 (0.2, 0.2, 0.001, 15.0, 200.0),
 (0.2, 0.2, 0.008, 5.0, 100.0),
 (0.2, 0.2, 0.008, 5.0, 150.0),
 (0.2, 0.2, 0.008, 5.0, 200.0),
 (0.2, 0.2, 0.008, 10.0, 100.0),
 (0.2, 0.2, 0.008, 10.0, 150.0),
 (0.2, 0.2, 0.008, 10.0, 200.0),
 (0.2, 0.2, 0.008, 15.0, 100.0),
 (0.2, 0.2, 0.008, 15.0, 150.0),
 (0.2, 0.2, 0.008, 15.0, 200.0),
 (0.2, 0.2, 0.015, 5.0, 100.0),
 (0.2, 0.2, 0.015, 5.0, 150.0),
 (0.2, 0.2, 0.015, 5.0, 200.0),
 (0.2, 0.2, 0.015, 10.0, 100.0),
 (0.2, 0.2, 0.015, 10.0, 150.0),
 (0.2, 0.2, 0.015, 10.0, 200.0),
 (0.2, 0.2, 0.015, 15.0, 100.0),
 (0.2, 0.2, 0.015, 15.0, 150.0),
 (0.2, 0.2, 0.015, 15.0, 200.0),
 (0.2, 1.1, 0.001, 5.0, 100.0),
 (0.2, 1.1, 0.001, 5.0, 150.0),
 (0.2, 1.1, 0.001, 5.0, 200.0),
 (0.2, 1.1, 0.001, 10.

The code below formulates and solves the ODE model in the paper https://aiche.onlinelibrary.wiley.com/doi/epdf/10.1002/aic.15667

We specifically use equations 1, 2, and 3. We approximate 3 for some intermediate value of Z to avoid using the averaging shown in equation 4. 

In [9]:
def pbr(t,C): # returns the RHS of the ODE model
    C_X = C[0] # concentration of biomass
    C_N = C[1] # concentration of nitrate
    C_L = C[2] # concentration of lutein
    
    I = 2*I0*(np.exp(-(tau*0.01*1000*C_X))) # computing attenuated intensity within the reactor. 
    
    Iscaling_u = I/(I+k_s + I**2/k_i)
    Iscaling_k = I/(I + k_sL + I**2/k_iL)
    
    u0 = u_m*Iscaling_u
    k0 = k_m*Iscaling_k
    
        
    #print(u_m, k_m, u0, k0,u_d)
    

    
    dCxdt = u0*C_N*C_X/(C_N + K_N) - u_d*C_X
    
    #print(C_N, C_X, (C_N + K_N), dCxdt, C_N*C_X/(C_N + K_N), u_d*C_X, C_N/(C_N + K_N))
    
    dCndt = -Y_NX*u0*C_N*C_X/(C_N + K_N) + F_in * C_N_in
    
    dCldt = k0*C_N*C_X/(C_N + K_NL) - k_d*C_L*C_X
    
    #print(k0, k_d, k0/k_d )
    
    return np.array([dCxdt, dCndt, dCldt])

In [10]:
from scipy.integrate import solve_ivp
import matplotlib.pyplot as plt
import pandas as pd

mainData = pd.DataFrame()
for i, data in enumerate(synth_data):
    C_x0, C_N0, F_in, C_N_in, I0 = data
    
    ta = np.linspace(0,150,200)
    
   
    y = solve_ivp(pbr, [0, 150], np.array([C_x0, C_N0, 0.0]), t_eval=ta) # solves the ODE model for a given set of initial conditions and simulation time
    

    t = y.t
    C = y.y
    #print(C[2]*1000)
    save = pd.DataFrame()
    save['Time'] = t
    save['C_X'] = C[0]
    save['C_N'] = C[1]
    save['C_L'] = C[2]
    save['C_x0'] = C_x0
    save['C_N0'] = C_N0
    save['F_in'] = F_in
    save['C_N_in'] = C_N_in
    save['I0'] = I0
    mainData = pd.concat([mainData, save])

#from pathlib import Path  

#filepath = Path('Documents\STEMVisualsSynthData_Opt.csv')  

#filepath.parent.mkdir(parents=True, exist_ok=True)  

#mainData.to_csv(filepath) 

In [11]:
mainData

Unnamed: 0,Time,C_X,C_N,C_L,C_x0,C_N0,F_in,C_N_in,I0
0,0.000000,0.200000,0.200000,0.000000,0.200000,0.200000,0.001000,5.000000,100.000000
1,0.753769,0.206758,0.201430,0.000030,0.200000,0.200000,0.001000,5.000000,100.000000
2,1.507538,0.213746,0.202779,0.000062,0.200000,0.200000,0.001000,5.000000,100.000000
3,2.261307,0.220972,0.204047,0.000094,0.200000,0.200000,0.001000,5.000000,100.000000
4,3.015075,0.228439,0.205231,0.000127,0.200000,0.200000,0.001000,5.000000,100.000000
...,...,...,...,...,...,...,...,...,...
195,146.984925,3.466120,30.939736,0.005380,1.905263,1.905263,0.014263,14.473684,194.736842
196,147.738693,3.466759,31.090408,0.005385,1.905263,1.905263,0.014263,14.473684,194.736842
197,148.492462,3.467387,31.241082,0.005390,1.905263,1.905263,0.014263,14.473684,194.736842
198,149.246231,3.468003,31.391759,0.005395,1.905263,1.905263,0.014263,14.473684,194.736842


In [12]:
endpoint_df = mainData[mainData["Time"] == 150]

In [13]:
endpoint_df

Unnamed: 0,Time,C_X,C_N,C_L,C_x0,C_N0,F_in,C_N_in,I0
199,150.0,2.006485,0.027028,0.007875,0.200000,0.200000,0.001000,5.000000,100.000000
199,150.0,2.726654,0.484138,0.007007,0.294737,0.294737,0.001737,5.526316,100.000000
199,150.0,2.793620,1.354379,0.006676,0.389474,0.389474,0.002474,6.052632,100.000000
199,150.0,2.818233,2.368360,0.006454,0.484211,0.484211,0.003211,6.578947,100.000000
199,150.0,2.832517,3.507861,0.006268,0.578947,0.578947,0.003947,7.105263,100.000000
...,...,...,...,...,...,...,...,...,...
199,150.0,3.462164,21.100699,0.005805,1.526316,1.526316,0.011316,12.368421,194.736842
199,150.0,3.463594,23.536625,0.005700,1.621053,1.621053,0.012053,12.894737,194.736842
199,150.0,3.465019,26.089030,0.005598,1.715789,1.715789,0.012789,13.421053,194.736842
199,150.0,3.465976,28.758322,0.005501,1.810526,1.810526,0.013526,13.947368,194.736842
