In [1]:
import r3.adapters as adapters
from sklearn.model_selection import train_test_split
import r3.schema as schema
import r3.adapters as adapters
from r3.models.assimulo import assimulo_model as ivt_model
import torch
import gpytorch
from r3.models.assimulo.gp_assimulo_mean_model import AssimuloMeanGaussianProcessModel


X_columns = [
    schema.IVTReactionSchema.NTP_M.value,
    schema.IVTReactionSchema.T7RNAP_u_uL.value,
    schema.IVTReactionSchema.DNA_ug_mL.value,
    schema.IVTReactionSchema.Mg2_M.value,
    schema.IVTReactionSchema.Spd_M.value,
    schema.IVTReactionSchema.TIME_min.value,
]

y_columns = [schema.IVTReactionSchema.RNA_g_L.value]

# Use all the LHS data for the model
experimental_conditions, response = adapters.DataPipelineAdapter(
    "egfp_lhs", verbose=False
).get(
    X_columns=X_columns,
    y_columns=y_columns,
    paths_to_merge=[["csp_lhs", "Definitive Screening Design IVT"]],
)

model = ivt_model.AssimuloIVTModel(template=schema.DNATemplates.ANY)
predictions = model.predict_rna_yield(
    experimental_conditions=experimental_conditions,
    verbose=False,
)

# use the residuals of the model to filter out outliers
residuals = (
    response[schema.IVTReactionSchema.RNA_g_L.value]
    - predictions[schema.IVTReactionSchema.RNA_g_L.value]
)
filtered_indices = residuals[residuals > -4].index
filtered_experimental_conditions = experimental_conditions.loc[filtered_indices]
filtered_response = response.loc[filtered_indices]


X = filtered_experimental_conditions[X_columns].values
Y = filtered_response[y_columns].values
X_train, X_test, y_train, y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42
)

# -------- load data ----------------
tx = torch.tensor(X_train, dtype=torch.float32)
ty = torch.tensor(y_train, dtype=torch.float32).squeeze()  # Ensure 1D

# -------- create model -------------
likelihood = gpytorch.likelihoods.GaussianLikelihood()
model = AssimuloMeanGaussianProcessModel(
    tx,
    ty,
    X_columns,
    likelihood,
    is_sequence=True,
    autoscaling=True,
)

# -------- fit ----------------------
model.train()
likelihood.train()
opt = torch.optim.Adam(model.parameters(), lr=0.1)
mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)

for epoch in range(1_000):
    opt.zero_grad()
    output = model(tx)
    loss = -mll(output, ty)
    loss.backward()
    opt.step()
    if epoch % 100 == 0 or epoch == 0:
        print(f"Epoch {epoch:4d} | Loss: {loss.item():.4f}")
        for name, param in model.named_parameters():
            if param.grad is not None:
                print(f"  {name}: grad norm = {param.grad.norm().item():.4f}")

# -------- predict ------------------
model.eval()

# Keep original scale for inputs/outputs; the model handles kernel input scaling internally.
adapters.evaluate_model(
    model=model,
    X_train=X_train,
    y_train=y_train,
    X_test=X_test,
    y_test=y_test,
)

Could not find cannot import name 'dopri5' from 'assimulo.lib' (c:\Users\Uchek\anaconda3\envs\assimulo\Lib\site-packages\assimulo\lib\__init__.py)
Could not find cannot import name 'rodas' from 'assimulo.lib' (c:\Users\Uchek\anaconda3\envs\assimulo\Lib\site-packages\assimulo\lib\__init__.py)
Could not find cannot import name 'odassl' from 'assimulo.lib' (c:\Users\Uchek\anaconda3\envs\assimulo\Lib\site-packages\assimulo\lib\__init__.py)
Could not find ODEPACK functions.
Could not find RADAR5
Could not find GLIMDA.
Could not find RADAR5
Could not find GLIMDA.


Epoch    0 | Loss: 1.8501
  likelihood.noise_covar.raw_noise: grad norm = 0.0755
  covar_module.raw_outputscale: grad norm = 0.1239
  covar_module.base_kernel.raw_lengthscale: grad norm = 0.1677
Epoch  100 | Loss: 1.6715
  likelihood.noise_covar.raw_noise: grad norm = 0.0005
  covar_module.raw_outputscale: grad norm = 0.0004
  covar_module.base_kernel.raw_lengthscale: grad norm = 0.0011
Epoch  100 | Loss: 1.6715
  likelihood.noise_covar.raw_noise: grad norm = 0.0005
  covar_module.raw_outputscale: grad norm = 0.0004
  covar_module.base_kernel.raw_lengthscale: grad norm = 0.0011
Epoch  200 | Loss: 1.6715
  likelihood.noise_covar.raw_noise: grad norm = 0.0000
  covar_module.raw_outputscale: grad norm = 0.0000
  covar_module.base_kernel.raw_lengthscale: grad norm = 0.0000
Epoch  200 | Loss: 1.6715
  likelihood.noise_covar.raw_noise: grad norm = 0.0000
  covar_module.raw_outputscale: grad norm = 0.0000
  covar_module.base_kernel.raw_lengthscale: grad norm = 0.0000
Epoch  300 | Loss: 1.6715

{'r2': {'train': 0.9965724817743717, 'test': 0.7704413362167052},
 'mae': {'train': 0.14715219242659863, 'test': 0.8307916654162936},
 'rmse': {'train': np.float64(0.19579341441766562),
  'test': np.float64(1.1848731933907368)}}