In [15]:
import r3.adapters as adapters
import r3.schema as schema


X_columns = [
    schema.IVTReactionSchema.pH.value,
    schema.IVTReactionSchema.TIME_min.value,
]
y_columns = [schema.IVTReactionSchema.RNA_g_L.value]
experimental_conditions, response = adapters.DataPipelineAdapter(
    "kate_ph_repeat", verbose=False
).get_all(
    X_columns=X_columns,
    y_columns=y_columns,
    paths=[
        ["charlie_ph", "Sheet1"],
        ["ricardo_ph", "eGFP HEPES"],
        ["ricardo_ph", "eGFP TRIS"],
    ],
)
response

[(['kate_ph_repeat'],
     RNA [g/L]
  0   0.000000
  1   1.322399
  2   2.844903
  3   5.138938
  4   7.869962
  5   9.969641
  6  11.647534
  7  12.824160
  8  13.605147
  9  14.279864),
 (['charlie_ph', 'Sheet1'],
     RNA [g/L]
  0   0.000000
  1   2.051094
  2   4.190807
  3   8.688998
  4  13.452314
  5  14.172919
  6  13.440914
  7  14.536508
  8  12.251665),
 (['ricardo_ph', 'eGFP HEPES'],
      RNA [g/L]
  0    0.094000
  1    0.164000
  2    0.369600
  3    0.816267
  4    1.842400
  5    2.635467
  6    4.327467
  7    6.281867
  8    7.022133
  9    7.073333
  10  10.997300),
 (['ricardo_ph', 'eGFP TRIS'],
      RNA [g/L]
  0    0.239200
  1    0.634933
  2    1.089333
  3    2.428000
  4    3.341333
  5    5.464267
  6    8.053333
  7    8.027733
  8   10.294400
  9    9.910400
  10  10.351000)]

In [25]:
import xgboost
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C

for i in range(len(experimental_conditions)):
    current_path, X_df = experimental_conditions[i]
    X_train = X_df.values
    y_train = response[i][1].values
    X_test_list, y_test_list = [], []
    for j in range(len(experimental_conditions)):
        if current_path == experimental_conditions[j][0]:
            continue
        _, X_test_df = experimental_conditions[j]
        _, y_test_df = response[j]
        X_test_list.append(X_test_df.values)
        y_test_list.append(y_test_df.values)

    xgb_model = xgboost.XGBRegressor(
        n_estimators=50,
        learning_rate=0.1,
        max_depth=3,
        random_state=42,
    )

    gp_model = GaussianProcessRegressor(
        kernel=C(1.0, constant_value_bounds="fixed")
        * RBF(length_scale=1.0, length_scale_bounds="fixed"),
        n_restarts_optimizer=10,
    )

    for X_test, y_test in zip(X_test_list, y_test_list):
        xgb_model.fit(X_train, y_train)
        print(
            f"Evaluating XGboost for {current_path} against {experimental_conditions[j][0]}"
        )
        print(
            adapters.evaluate_model(
                xgb_model,
                X_train,
                y_train,
                X_test,
                y_test,
            )[
                "r2"
            ]["test"]
        )

        print(
            f"Evaluating Gaussian Process for {current_path} against {experimental_conditions[j][0]}"
        )
        print(
            adapters.evaluate_model(
                gp_model,
                X_train,
                y_train,
                X_test,
                y_test,
            )[
                "r2"
            ]["test"]
        )
        print("\n")

Evaluating XGboost for ['kate_ph_repeat'] against ['ricardo_ph', 'eGFP TRIS']
0.5150371010070478
Evaluating Gaussian Process for ['kate_ph_repeat'] against ['ricardo_ph', 'eGFP TRIS']
-2.9323146482311393


Evaluating XGboost for ['kate_ph_repeat'] against ['ricardo_ph', 'eGFP TRIS']
-2.377209294683336
Evaluating Gaussian Process for ['kate_ph_repeat'] against ['ricardo_ph', 'eGFP TRIS']
-1.1918879638520323


Evaluating XGboost for ['kate_ph_repeat'] against ['ricardo_ph', 'eGFP TRIS']
0.19046880587447435
Evaluating Gaussian Process for ['kate_ph_repeat'] against ['ricardo_ph', 'eGFP TRIS']
-1.9877351047592082


Evaluating XGboost for ['charlie_ph', 'Sheet1'] against ['ricardo_ph', 'eGFP TRIS']
-0.1648113204930406
Evaluating Gaussian Process for ['charlie_ph', 'Sheet1'] against ['ricardo_ph', 'eGFP TRIS']
-2.482055242973995


Evaluating XGboost for ['charlie_ph', 'Sheet1'] against ['ricardo_ph', 'eGFP TRIS']
-7.492857788886095
Evaluating Gaussian Process for ['charlie_ph', 'Sheet1'] aga