# Fetch data for a single model

In [1]:
# features to ignore
excludeFeats = ["acure_anth_so2", "acure_carb_bb_ems", "acure_carb_ff_ems", "acure_carb_res_ems"]
kernelLen = 59 - len(excludeFeats)

# create kernel
GPR_kernel = c3.SklearnGPRKernelMatern(lengthScale=[1.0]*kernelLen, nu=0.5, coefficient=1.0).build().kernel.upsert()

# define technique
GPR_technique = c3.GaussianProcessRegressionTechnique(
                    randomState=42,
                    kernel = GPR_kernel
).upsert()

# define data source spec
GPR_dataspec = c3.GPRDataSourceSpec(
    featuresType = c3.TypeRef(
        typeName="SimulationModelParameters"
    ),
    featuresSpec=c3.FetchSpec(
        limit=-1
    ),
    excludeFeatures=excludeFeats,
    targetType=c3.TypeRef(
        typeName="Simulation3HourlyAODOutput"
    ),
    targetSpec=c3.FetchSpec(
        filter="geoSurfaceTimePoint.id == '0.625_0.938_2017-07-01T00:20:00'"
    ),
    targetName="all"
).upsert()

# create pipe
GPR_pipe = c3.GaussianProcessRegressionPipe(
    technique=GPR_technique,
    dataSourceSpec=GPR_dataspec
)

In [2]:
X = GPR_pipe.getFeatures()
arX = c3.Dataset.toNumpy(dataset=X)
dfX = c3.Dataset.toPandas(dataset=X)
dfX

Unnamed: 0,acure_bl_nuc,acure_ait_width,acure_cloud_ph,acure_carb_ff_ems_eur,acure_carb_ff_ems_nam,acure_carb_ff_ems_chi,acure_carb_ff_ems_asi,acure_carb_ff_ems_mar,acure_carb_ff_ems_r,acure_carb_bb_ems_sam,...,acure_oxidants_o3,bparam,two_d_fsd_factor,c_r_correl,acure_autoconv_exp_lwp,acure_autoconv_exp_nd,dbsdtbs_turb_0,ai,m_ci,a_ent_1_rp
0,0.500000,0.650000,0.396000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,...,0.576175,0.500000,0.400000,0.900000,0.275862,0.605000,0.150000,0.514000,0.333333,0.460000
1,0.470000,0.500000,0.500000,0.530000,0.470000,0.530000,0.470000,0.530000,0.470000,0.530000,...,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000,0.500000
2,0.969888,0.083081,0.478474,0.577525,0.231615,0.283145,0.050178,0.419984,0.914346,0.716752,...,0.017104,0.927093,0.833905,0.610920,0.993935,0.755788,0.774187,0.960911,0.988952,0.508725
3,0.132847,0.445265,0.390414,0.925921,0.630498,0.187251,0.817525,0.147787,0.462419,0.542292,...,0.010731,0.950732,0.902536,0.780157,0.267910,0.018570,0.106893,0.218308,0.163327,0.936031
4,0.058261,0.630422,0.132292,0.009463,0.338064,0.913479,0.575490,0.412795,0.271141,0.370521,...,0.779013,0.129769,0.712185,0.552866,0.328090,0.651008,0.613814,0.101666,0.254514,0.089525
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216,0.591530,0.996801,0.170201,0.677187,0.865209,0.194226,0.526719,0.355815,0.187145,0.553831,...,0.836703,0.515917,0.891746,0.418227,0.732156,0.900471,0.035621,0.175431,0.482366,0.400788
217,0.774235,0.165151,0.881014,0.038024,0.157743,0.035123,0.946019,0.699902,0.097885,0.200393,...,0.787844,0.786932,0.231686,0.840574,0.349984,0.433408,0.930975,0.119901,0.777433,0.116056
218,0.227072,0.231834,0.185796,0.997232,0.609053,0.753345,0.219389,0.438737,0.928641,0.861326,...,0.709293,0.204519,0.096402,0.147632,0.313749,0.741739,0.144573,0.488763,0.245905,0.686731
219,0.047377,0.633909,0.721278,0.751703,0.047339,0.692896,0.994552,0.155710,0.762626,0.072590,...,0.214451,0.303059,0.898960,0.114170,0.725976,0.766313,0.259877,0.872825,0.966951,0.826032


In [3]:
y = GPR_pipe.getTarget()
ary = c3.Dataset.toNumpy(dataset=y)
dfy = c3.Dataset.toPandas(dataset=y)
dfy

Unnamed: 0,all
0,0.276185
1,0.297315
2,0.424212
3,0.249577
4,0.244986
...,...
216,0.362796
217,0.374669
218,0.230886
219,0.168842


## Send data to file

In [4]:
dfX.to_pickle("./dfX.pkl")
dfy.to_pickle("./dfy.pkl")

# Train the model in the typical way with C3 method

In [4]:
GPR_trained = GPR_pipe.train(input=X, targetOutput=y)

In [51]:
c3_kernel = c3.PythonSerialization.deserialize(serialized=GPR_trained.trainedModel.model).kernel_

In [52]:
c3_kernel.get_params()

{'k1': 0.492**2,
 'k2': Matern(length_scale=[1e+05, 1e+05, 1e+05, 1e+05, 8.94e+04, 1e+05, 9.71e+04, 1e+05, 422, 1e+05, 1e+05, 273, 9.85e+04, 8.33e+04, 1e+05, 1e+05, 1e+05, 7.69e+04, 5.93e+04, 246, 6.55e+04, 234, 1e+05, 25.3, 62.9, 1e+05, 235, 1e+05, 1e+05, 282, 158, 67.9, 120, 1e+05, 240, 44.1, 150, 64.8, 1.85e+04, 1e+05, 4.15e+04, 133, 1e+05, 124, 205, 450, 366, 90.7, 162, 118, 455, 1e+05, 145, 262, 132], nu=0.5),
 'k1__constant_value': 0.24202518386767827,
 'k1__constant_value_bounds': (1e-05, 100000.0),
 'k2__length_scale': array([1.00000000e+05, 1.00000000e+05, 1.00000000e+05, 1.00000000e+05,
        8.93866091e+04, 1.00000000e+05, 9.71425199e+04, 1.00000000e+05,
        4.21894116e+02, 1.00000000e+05, 1.00000000e+05, 2.72693118e+02,
        9.85156795e+04, 8.33056169e+04, 1.00000000e+05, 1.00000000e+05,
        1.00000000e+05, 7.68703164e+04, 5.93470010e+04, 2.45861701e+02,
        6.54778048e+04, 2.33684626e+02, 1.00000000e+05, 2.52510450e+01,
        6.29253115e+01, 1.00000000e+

# Use the code inside the train method

Include the serialization/deserialization between the train and process methods.

In [41]:
from sklearn.gaussian_process import GaussianProcessRegressor

technique = c3.GaussianProcessRegressionTechnique.get(GPR_pipe.technique.id)
serializedKernel = c3.SklearnGPRKernel.get(technique.kernel.id)

In [42]:
# get data
X = c3.Dataset.toNumpy(dataset=X)
y = c3.Dataset.toNumpy(dataset=y)

In [43]:
# get kernel object from c3, make it python again
kernel = c3.PythonSerialization.deserialize(serialized=serializedKernel.pickledKernel)

In [44]:
# build and train GPR
gp = GaussianProcessRegressor(kernel=kernel)
gp.fit(X, y)

  / np.sqrt(D.sum(2))[:, :, np.newaxis]


GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], nu=0.5))

In [47]:
gp.kernel_.get_params()

{'k1': 0.492**2,
 'k2': Matern(length_scale=[8.93e+04, 8.48e+04, 9.56e+04, 8.98e+04, 7.33e+04, 1e+05, 1e+05, 8.97e+04, 421, 1e+05, 9.33e+04, 273, 1e+05, 6.6e+04, 1e+05, 1e+05, 1e+05, 6.23e+04, 5.26e+04, 246, 5.48e+04, 234, 8.16e+04, 25.3, 63, 1e+05, 235, 1e+05, 7.75e+04, 282, 159, 68, 120, 1e+05, 240, 44.2, 150, 64.9, 1.88e+04, 9.7e+04, 4.09e+04, 133, 9.18e+04, 124, 206, 451, 367, 90.8, 162, 119, 455, 9.45e+04, 145, 263, 132], nu=0.5),
 'k1__constant_value': 0.24241139430604702,
 'k1__constant_value_bounds': (1e-05, 100000.0),
 'k2__length_scale': array([8.92602387e+04, 8.48081159e+04, 9.55874714e+04, 8.98031859e+04,
        7.33395716e+04, 1.00000000e+05, 1.00000000e+05, 8.97385042e+04,
        4.20930634e+02, 1.00000000e+05, 9.33074096e+04, 2.73144564e+02,
        1.00000000e+05, 6.59677407e+04, 1.00000000e+05, 1.00000000e+05,
        1.00000000e+05, 6.23428878e+04, 5.26115280e+04, 2.46272223e+02,
        5.48312319e+04, 2.34155814e+02, 8.16188421e+04, 2.52879949e+01,
        6.30099

In [48]:
trainedModel = c3.MLTrainedModelArtifact(model=c3.PythonSerialization.serialize(obj=gp))

In [49]:
 gp = c3.PythonSerialization.deserialize(serialized=trainedModel.model)

In [50]:
gp.kernel_.get_params()

{'k1': 0.492**2,
 'k2': Matern(length_scale=[8.93e+04, 8.48e+04, 9.56e+04, 8.98e+04, 7.33e+04, 1e+05, 1e+05, 8.97e+04, 421, 1e+05, 9.33e+04, 273, 1e+05, 6.6e+04, 1e+05, 1e+05, 1e+05, 6.23e+04, 5.26e+04, 246, 5.48e+04, 234, 8.16e+04, 25.3, 63, 1e+05, 235, 1e+05, 7.75e+04, 282, 159, 68, 120, 1e+05, 240, 44.2, 150, 64.9, 1.88e+04, 9.7e+04, 4.09e+04, 133, 9.18e+04, 124, 206, 451, 367, 90.8, 162, 119, 455, 9.45e+04, 145, 263, 132], nu=0.5),
 'k1__constant_value': 0.24241139430604702,
 'k1__constant_value_bounds': (1e-05, 100000.0),
 'k2__length_scale': array([8.92602387e+04, 8.48081159e+04, 9.55874714e+04, 8.98031859e+04,
        7.33395716e+04, 1.00000000e+05, 1.00000000e+05, 8.97385042e+04,
        4.20930634e+02, 1.00000000e+05, 9.33074096e+04, 2.73144564e+02,
        1.00000000e+05, 6.59677407e+04, 1.00000000e+05, 1.00000000e+05,
        1.00000000e+05, 6.23428878e+04, 5.26115280e+04, 2.46272223e+02,
        5.48312319e+04, 2.34155814e+02, 8.16188421e+04, 2.52879949e+01,
        6.30099

## Use Sklearn directly on the fetched datasets

In [13]:
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern

import numpy as np

In [14]:
kernel = 1.0 * Matern(length_scale=[1.0]*55, nu=0.5)
gaussian_process = GaussianProcessRegressor(kernel=kernel, random_state=42)
gaussian_process.fit(dfX, dfy)

  / np.sqrt(D.sum(2))[:, :, np.newaxis]


GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], nu=0.5),
                         random_state=42)

In [15]:
skl_kernel = gaussian_process.kernel_

In [55]:
kernel = 1.0 * Matern(length_scale=[1.0]*55, nu=0.5)
gaussian_process = GaussianProcessRegressor(kernel=kernel, random_state=42)
gaussian_process.fit(arX, ary)

  / np.sqrt(D.sum(2))[:, :, np.newaxis]


GaussianProcessRegressor(kernel=1**2 * Matern(length_scale=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], nu=0.5),
                         random_state=42)

In [56]:
skl_kernel = gaussian_process.kernel_

In [57]:
skl_kernel.get_params()

{'k1': 0.492**2,
 'k2': Matern(length_scale=[8.93e+04, 8.48e+04, 9.56e+04, 8.98e+04, 7.33e+04, 1e+05, 1e+05, 8.97e+04, 421, 1e+05, 9.33e+04, 273, 1e+05, 6.6e+04, 1e+05, 1e+05, 1e+05, 6.23e+04, 5.26e+04, 246, 5.48e+04, 234, 8.16e+04, 25.3, 63, 1e+05, 235, 1e+05, 7.75e+04, 282, 159, 68, 120, 1e+05, 240, 44.2, 150, 64.9, 1.88e+04, 9.7e+04, 4.09e+04, 133, 9.18e+04, 124, 206, 451, 367, 90.8, 162, 119, 455, 9.45e+04, 145, 263, 132], nu=0.5),
 'k1__constant_value': 0.24241139430604702,
 'k1__constant_value_bounds': (1e-05, 100000.0),
 'k2__length_scale': array([8.92602387e+04, 8.48081159e+04, 9.55874714e+04, 8.98031859e+04,
        7.33395716e+04, 1.00000000e+05, 1.00000000e+05, 8.97385042e+04,
        4.20930634e+02, 1.00000000e+05, 9.33074096e+04, 2.73144564e+02,
        1.00000000e+05, 6.59677407e+04, 1.00000000e+05, 1.00000000e+05,
        1.00000000e+05, 6.23428878e+04, 5.26115280e+04, 2.46272223e+02,
        5.48312319e+04, 2.34155814e+02, 8.16188421e+04, 2.52879949e+01,
        6.30099

# Comparing length scales

In [18]:
skl_kernel.get_params()["k2__length_scale"]

array([8.92602387e+04, 8.48081159e+04, 9.55874714e+04, 8.98031859e+04,
       7.33395716e+04, 1.00000000e+05, 1.00000000e+05, 8.97385042e+04,
       4.20930634e+02, 1.00000000e+05, 9.33074096e+04, 2.73144564e+02,
       1.00000000e+05, 6.59677407e+04, 1.00000000e+05, 1.00000000e+05,
       1.00000000e+05, 6.23428878e+04, 5.26115280e+04, 2.46272223e+02,
       5.48312319e+04, 2.34155814e+02, 8.16188421e+04, 2.52879949e+01,
       6.30099640e+01, 1.00000000e+05, 2.35499509e+02, 1.00000000e+05,
       7.74961847e+04, 2.82258133e+02, 1.58755593e+02, 6.80320821e+01,
       1.20101179e+02, 1.00000000e+05, 2.39968789e+02, 4.42092890e+01,
       1.50247915e+02, 6.49495185e+01, 1.88363512e+04, 9.69664419e+04,
       4.08606621e+04, 1.33463623e+02, 9.17655349e+04, 1.23828595e+02,
       2.05785243e+02, 4.51354837e+02, 3.66762868e+02, 9.08449471e+01,
       1.62026051e+02, 1.18548983e+02, 4.55001880e+02, 9.45202150e+04,
       1.45189940e+02, 2.63193806e+02, 1.31986038e+02])

In [19]:
c3_kernel.get_params()["k2__length_scale"]

array([1.00000000e+05, 1.00000000e+05, 1.00000000e+05, 1.00000000e+05,
       8.93866091e+04, 1.00000000e+05, 9.71425199e+04, 1.00000000e+05,
       4.21894116e+02, 1.00000000e+05, 1.00000000e+05, 2.72693118e+02,
       9.85156795e+04, 8.33056169e+04, 1.00000000e+05, 1.00000000e+05,
       1.00000000e+05, 7.68703164e+04, 5.93470010e+04, 2.45861701e+02,
       6.54778048e+04, 2.33684626e+02, 1.00000000e+05, 2.52510450e+01,
       6.29253115e+01, 1.00000000e+05, 2.35160626e+02, 1.00000000e+05,
       1.00000000e+05, 2.81800400e+02, 1.58436791e+02, 6.79126200e+01,
       1.19928354e+02, 1.00000000e+05, 2.39573564e+02, 4.41460243e+01,
       1.49983987e+02, 6.48382843e+01, 1.85362991e+04, 1.00000000e+05,
       4.14920396e+04, 1.33276630e+02, 1.00000000e+05, 1.23639522e+02,
       2.05476604e+02, 4.50127849e+02, 3.65834605e+02, 9.06995846e+01,
       1.61739643e+02, 1.18341370e+02, 4.54691383e+02, 1.00000000e+05,
       1.44938241e+02, 2.62276237e+02, 1.31792530e+02])

In [20]:
(skl_kernel.get_params()["k2__length_scale"] - c3_kernel.get_params()["k2__length_scale"]) / \
skl_kernel.get_params()["k2__length_scale"]

array([-0.12031966, -0.17913243, -0.04616221, -0.11354624, -0.21880463,
        0.        ,  0.0285748 , -0.11434886, -0.00228893,  0.        ,
       -0.07172625,  0.00165277,  0.0148432 , -0.26282356,  0.        ,
        0.        ,  0.        , -0.23302463, -0.12802276,  0.00166694,
       -0.19416987,  0.00201229, -0.22520729,  0.00146116,  0.00134348,
        0.        ,  0.001439  ,  0.        , -0.29038611,  0.00162168,
        0.00200813,  0.00175597,  0.00143899,  0.        ,  0.00164699,
        0.00143103,  0.00175662,  0.00171262,  0.01592942, -0.03128462,
       -0.01545196,  0.00140107, -0.08973375,  0.00152689,  0.00149981,
        0.00271845,  0.00253096,  0.00160012,  0.00176767,  0.00175128,
        0.00068241, -0.05797474,  0.00173359,  0.00348629,  0.00146613])

# Version checks

In [21]:
import sklearn
print(sklearn.__version__)
import numpy
print(numpy.__version__)
import scipy
print(scipy.__version__)

0.23.1
1.18.1
1.4.1


In [25]:
!python --version

Python 3.8.10


In [26]:
from platform import python_version

In [27]:
python_version()

'3.6.13'

# Using py-gordon-ML_1_0_0

## Sklearn loop

In [28]:
parameters = []

for _ in range(10):
    kernel = 1.0 * Matern(length_scale=[1.0]*55, nu=0.5)
    gaussian_process = GaussianProcessRegressor(kernel=kernel, random_state=42)
    gaussian_process.fit(dfX, dfy)
    skl_kernel = gaussian_process.kernel_
    parameters.append(skl_kernel.get_params()["k2__length_scale"])

  / np.sqrt(D.sum(2))[:, :, np.newaxis]
  / np.sqrt(D.sum(2))[:, :, np.newaxis]
  / np.sqrt(D.sum(2))[:, :, np.newaxis]
  / np.sqrt(D.sum(2))[:, :, np.newaxis]
  / np.sqrt(D.sum(2))[:, :, np.newaxis]
  / np.sqrt(D.sum(2))[:, :, np.newaxis]
  / np.sqrt(D.sum(2))[:, :, np.newaxis]
  / np.sqrt(D.sum(2))[:, :, np.newaxis]
  / np.sqrt(D.sum(2))[:, :, np.newaxis]
  / np.sqrt(D.sum(2))[:, :, np.newaxis]


In [29]:
parameters

[array([8.92602387e+04, 8.48081159e+04, 9.55874714e+04, 8.98031859e+04,
        7.33395716e+04, 1.00000000e+05, 1.00000000e+05, 8.97385042e+04,
        4.20930634e+02, 1.00000000e+05, 9.33074096e+04, 2.73144564e+02,
        1.00000000e+05, 6.59677407e+04, 1.00000000e+05, 1.00000000e+05,
        1.00000000e+05, 6.23428878e+04, 5.26115280e+04, 2.46272223e+02,
        5.48312319e+04, 2.34155814e+02, 8.16188421e+04, 2.52879949e+01,
        6.30099640e+01, 1.00000000e+05, 2.35499509e+02, 1.00000000e+05,
        7.74961847e+04, 2.82258133e+02, 1.58755593e+02, 6.80320821e+01,
        1.20101179e+02, 1.00000000e+05, 2.39968789e+02, 4.42092890e+01,
        1.50247915e+02, 6.49495185e+01, 1.88363512e+04, 9.69664419e+04,
        4.08606621e+04, 1.33463623e+02, 9.17655349e+04, 1.23828595e+02,
        2.05785243e+02, 4.51354837e+02, 3.66762868e+02, 9.08449471e+01,
        1.62026051e+02, 1.18548983e+02, 4.55001880e+02, 9.45202150e+04,
        1.45189940e+02, 2.63193806e+02, 1.31986038e+02]),
 array

Sklearn generates exactly the same results at each iteration.

## Dataset / Dataframe serialization

In [34]:
# serialize dfX
X_1 = c3.Dataset.fromPython(dfX)

# deserialize it
dfX_1 = c3.Dataset.toPandas(X_1)

# deserialize it to numpy
array_X_1 = c3.Dataset.toNumpy(X_1)

In [33]:
diff = dfX - dfX_1
diff.describe()

Unnamed: 0,acure_bl_nuc,acure_ait_width,acure_cloud_ph,acure_carb_ff_ems_eur,acure_carb_ff_ems_nam,acure_carb_ff_ems_chi,acure_carb_ff_ems_asi,acure_carb_ff_ems_mar,acure_carb_ff_ems_r,acure_carb_bb_ems_sam,...,acure_oxidants_o3,bparam,two_d_fsd_factor,c_r_correl,acure_autoconv_exp_lwp,acure_autoconv_exp_nd,dbsdtbs_turb_0,ai,m_ci,a_ent_1_rp
count,221.0,221.0,221.0,221.0,221.0,221.0,221.0,221.0,221.0,221.0,...,221.0,221.0,221.0,221.0,221.0,221.0,221.0,221.0,221.0,221.0
mean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
std,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


That seems okay too...

In [35]:
array_X_1

array([[0.5       , 0.65      , 0.396     , ..., 0.514     , 0.33333333,
        0.46      ],
       [0.47      , 0.5       , 0.5       , ..., 0.5       , 0.5       ,
        0.5       ],
       [0.96988759, 0.08308147, 0.47847368, ..., 0.96091054, 0.98895154,
        0.50872524],
       ...,
       [0.22707157, 0.23183416, 0.18579586, ..., 0.48876258, 0.24590541,
        0.68673073],
       [0.04737704, 0.63390885, 0.72127844, ..., 0.87282541, 0.96695102,
        0.82603208],
       [0.91227063, 0.27720145, 0.81453913, ..., 0.88457225, 0.72461078,
        0.87985117]])

# Dispatch code to server as Lambda

In [1]:
def train_model():
    # data source and technique specs
    excludeFeats = ["acure_anth_so2", "acure_carb_bb_ems", "acure_carb_ff_ems", "acure_carb_res_ems"]
    kernelLen = 59 - len(excludeFeats)
    GPR_kernel = c3.SklearnGPRKernelMatern(lengthScale=[1.0]*kernelLen, nu=0.5, coefficient=1.0).build().kernel.upsert()
    GPR_technique = c3.GaussianProcessRegressionTechnique(
                    randomState=42,
                    kernel = GPR_kernel
    ).upsert()
    GPR_dataspec = c3.GPRDataSourceSpec(
        featuresType = c3.TypeRef(
            typeName="SimulationModelParameters"
        ),
        featuresSpec=c3.FetchSpec(
            limit=-1
        ),
        excludeFeatures=excludeFeats,
        targetType=c3.TypeRef(
            typeName="Simulation3HourlyAODOutput"
        ),
        targetSpec=c3.FetchSpec(
            filter="geoSurfaceTimePoint.id == '0.625_0.938_2017-07-01T00:20:00'"
        ),
        targetName="all"
    ).upsert()
    GPR_pipe = c3.GaussianProcessRegressionPipe(
        technique=GPR_technique,
        dataSourceSpec=GPR_dataspec
    )
    
    # get training data
    X = GPR_pipe.getFeatures()
    y = GPR_pipe.getTarget()
    
    # train pipe
    GPR_trained = GPR_pipe.train(input=X, targetOutput=y)
    c3_kernel = c3.PythonSerialization.deserialize(serialized=GPR_trained.trainedModel.model).kernel_
    
    return c3_kernel.get_params()['k2__length_scale']

In [2]:
c3_kernel = c3.Lambda.fromPython(train_model, runtime="gordon-ML_1_0_0").apply()

In [66]:
print(c3_kernel)

[100000.00000000001, 100000.00000000001, 100000.00000000001, 100000.00000000001, 89386.60911083498, 100000.00000000001, 97142.51992393423, 100000.00000000001, 421.8941158666919, 100000.00000000001, 100000.00000000001, 272.69311791970944, 98515.67953991563, 83305.61686288506, 100000.00000000001, 100000.00000000001, 100000.00000000001, 76870.31637186119, 59347.00097049436, 245.86170107823517, 65477.8048298974, 233.68462556863327, 100000.00000000001, 25.251044971756432, 62.92531145124077, 100000.00000000001, 235.16062551230507, 100000.00000000001, 100000.00000000001, 281.8003996785048, 158.4367912708153, 67.91262003293444, 119.92835439134923, 100000.00000000001, 239.57356396200376, 44.14602433970742, 149.98398658841808, 64.83828433222116, 18536.29905894243, 100000.00000000001, 41492.0396089717, 133.27663049407585, 100000.00000000001, 123.6395219431534, 205.476604239791, 450.12784905581645, 365.8346051939183, 90.69958464202328, 161.7396428006731, 118.34136984805768, 454.69138310153573, 100

In [63]:
c3_kernel2 = train_model()

In [65]:
print(c3_kernel2)

[1.00000000e+05 1.00000000e+05 1.00000000e+05 1.00000000e+05
 1.00000000e+05 1.00000000e+05 1.00000000e+05 1.00000000e+05
 4.21668536e+02 1.00000000e+05 1.00000000e+05 2.73249423e+02
 1.00000000e+05 1.00000000e+05 1.00000000e+05 1.00000000e+05
 1.00000000e+05 1.00000000e+05 1.00000000e+05 2.46722463e+02
 1.00000000e+05 2.34168219e+02 1.00000000e+05 2.53016454e+01
 6.30416915e+01 1.00000000e+05 2.35593522e+02 1.00000000e+05
 1.00000000e+05 2.82373141e+02 1.58804916e+02 6.80547504e+01
 1.20154609e+02 1.00000000e+05 2.40010728e+02 4.42328247e+01
 1.50318848e+02 6.49750355e+01 6.86067502e+04 1.00000000e+05
 1.00000000e+05 1.33525551e+02 1.00000000e+05 1.23914263e+02
 2.05901751e+02 4.52394565e+02 3.66761143e+02 9.08776812e+01
 1.62096188e+02 1.18616164e+02 4.54392120e+02 1.00000000e+05
 1.45282857e+02 2.62994647e+02 1.32077327e+02]


# Send data to file