# Get a list of GSTPs to consider

My preferred way is to grab the targetSpec filters directly from the parameter table, so all the machinery below is copied in order to collect a list of these strings from that table.

In [34]:
lat1 = -30.0
lat2 = 10.0
lon1 = -45.0
lon2 = 40.0
time1 = "2017-07-01T00:00:00"
time2 = "2017-07-01T02:59:59"
gstpFilter = c3.Filter() \
    .ge("latitude", lat1) \
    .and_().le("latitude", lat2) \
    .and_().ge("longitude", lon1) \
    .and_().le("longitude", lon2) \
    .and_().ge("time", time1) \
    .and_().le("time", time2)

excludeFeats = ["acure_anth_so2", "acure_carb_bb_ems", "acure_carb_ff_ems", "acure_carb_res_ems"]
kernelLen = 59 - len(excludeFeats)

GPR_kernel = c3.SklearnGPRKernelMatern(lengthScale=[1.0]*kernelLen, nu=0.5, coefficient=1.0).build().kernel

GPR_technique = c3.GaussianProcessRegressionTechnique(
                    randomState=42,
                    kernel = GPR_kernel
)

In [35]:
job = c3.AODGPRModelFinder.extractLearnedParametersJob(excludeFeats, gstpFilter, "all", GPR_technique, 10)

In [41]:
job.status()

c3.MapReduceStatus(
 started=datetime.datetime(2022, 8, 15, 21, 10, 39, tzinfo=datetime.timezone.utc),
 startedby='jcarzon@andrew.cmu.edu',
 completed=datetime.datetime(2022, 8, 15, 21, 14, 25, tzinfo=datetime.timezone.utc),
 status='completed')

In [42]:
df = c3.AODGPRModelFinder.getDataframeFromJob(job)

In [43]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,49,50,51,52,53,54,modelId,latitude,longitude,time
0,477.589894,26389.964348,17830.306158,13025.238330,37326.271992,23790.255157,38716.632380,36114.558567,16616.476091,23587.198932,...,92.528024,350.771145,38750.571208,154.860874,16389.621089,109.744419,199ccfa9-b72e-42e3-b17d-9acf01bd8e1e,-0.625,-0.9375,2017-07-01T00:20:00
1,477.589894,26389.964348,17830.306158,13025.238330,37326.271992,23790.255157,38716.632380,36114.558567,16616.476091,23587.198932,...,92.528024,350.771145,38750.571208,154.860874,16389.621089,109.744419,1ae2b3dc-f62e-4a60-8510-5b36166bc47b,-0.625,-0.9375,2017-07-01T00:20:00
2,477.589894,26389.964348,17830.306158,13025.238330,37326.271992,23790.255157,38716.632380,36114.558567,16616.476091,23587.198932,...,92.528024,350.771145,38750.571208,154.860874,16389.621089,109.744419,5dd01dd5-10e2-486a-9136-61bfcefdfd4a,-0.625,-0.9375,2017-07-01T00:20:00
3,477.589894,26389.964348,17830.306158,13025.238330,37326.271992,23790.255157,38716.632380,36114.558567,16616.476091,23587.198932,...,92.528024,350.771145,38750.571208,154.860874,16389.621089,109.744419,6e0c728d-e3bb-4afe-9bc2-d821abcddaa0,-0.625,-0.9375,2017-07-01T00:20:00
4,477.589894,26389.964348,17830.306158,13025.238330,37326.271992,23790.255157,38716.632380,36114.558567,16616.476091,23587.198932,...,92.528024,350.771145,38750.571208,154.860874,16389.621089,109.744419,73ac586a-e28e-40c2-9e79-961ebae9f5e9,-0.625,-0.9375,2017-07-01T00:20:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3886,280.316120,976.252696,37710.786663,27074.369314,35238.437805,37950.002471,32050.342123,21711.169740,247.747471,22290.277151,...,167.941213,375.408663,41837.952440,100000.000000,41500.159802,100.964739,0bd2056f-8493-4d46-abce-a085771407be,-6.875,8.4375,2017-07-01T00:20:00
3887,280.316120,976.252696,37710.786663,27074.369314,35238.437805,37950.002471,32050.342123,21711.169740,247.747471,22290.277151,...,167.941213,375.408663,41837.952440,100000.000000,41500.159802,100.964739,2ff4abf8-d3e6-4926-aecb-e6aa945a8b35,-6.875,8.4375,2017-07-01T00:20:00
3888,280.316120,976.252696,37710.786663,27074.369314,35238.437805,37950.002471,32050.342123,21711.169740,247.747471,22290.277151,...,167.941213,375.408663,41837.952440,100000.000000,41500.159802,100.964739,514410fb-b8ea-4232-a169-140373f822e7,-6.875,8.4375,2017-07-01T00:20:00
3889,280.316120,976.252696,37710.786663,27074.369314,35238.437805,37950.002471,32050.342123,21711.169740,247.747471,22290.277151,...,167.941213,375.408663,41837.952440,100000.000000,41500.159802,100.964739,8caebfdc-0eec-4b88-b2c2-5d559eb87e57,-6.875,8.4375,2017-07-01T00:20:00


Below I'm just making up examples of filter strings.

In [48]:
def get_GSTP_filter(
    k
):
    """
    k : int
        Index for a list of model IDs
    """
    pipe = c3.GaussianProcessRegressionPipe.get(df.modelId[k])
    sourceSpec = c3.GPRDataSourceSpec.get(pipe.dataSourceSpec.id)
    return sourceSpec.targetSpec.filter

geoSurfaceTimePoints = list(set([get_GSTP_filter(k) for k in range(200)]))

gstpFilterShort = ' || '.join(geoSurfaceTimePoints[0:5])
gstpFilterLong = ' || '.join(geoSurfaceTimePoints)

In [50]:
len(gstpFilterShort)

325

In [51]:
len(gstpFilterLong)

4261

In [79]:
geoSurfaceTimePoints[0:5]

['geoSurfaceTimePoint.id == "-8.125_-34.688_2017-07-01T00:20:00"',
 'geoSurfaceTimePoint.id == "-8.125_-44.062_2017-07-01T00:20:00"',
 'geoSurfaceTimePoint.id == "-0.625_-17.812_2017-07-01T00:20:00"',
 'geoSurfaceTimePoint.id == "-8.125_25.312_2017-07-01T00:20:00"',
 'geoSurfaceTimePoint.id == "-8.125_-12.188_2017-07-01T00:20:00"']

# Train a single model using these GSTPs

## Short example

This should run just fine.

In [62]:
# features to ignore
excludeFeats = ['acure_bl_nuc', 'acure_ait_width', 'acure_cloud_ph',
       'acure_carb_ff_ems', 'acure_carb_ff_ems_eur', 'acure_carb_ff_ems_nam',
       'acure_carb_ff_ems_chi', 'acure_carb_ff_ems_asi',
       'acure_carb_ff_ems_mar', 'acure_carb_ff_ems_r', 'acure_carb_bb_ems',
       'acure_carb_bb_ems_sam', 'acure_carb_bb_ems_naf',
       'acure_carb_bb_ems_saf', 'acure_carb_bb_ems_bnh',
       'acure_carb_bb_ems_rnh', 'acure_carb_bb_ems_rsh', 'acure_carb_res_ems',
       'acure_carb_res_ems_chi', 'acure_carb_res_ems_asi',
       'acure_carb_res_ems_afr', 'acure_carb_res_ems_lat',
       'acure_carb_res_ems_r', 'acure_carb_ff_diam', 'acure_carb_bb_diam',
       'acure_carb_res_diam', 'acure_prim_so4_diam', 'acure_sea_spray',
       'acure_anth_so2', 'acure_anth_so2_chi', 'acure_anth_so2_asi',
       'acure_anth_so2_eur', 'acure_anth_so2_nam', 'acure_anth_so2_r',
       'acure_volc_so2', 'acure_bvoc_soa', 'acure_dms', 'acure_prim_moc',
       'acure_dry_dep_ait', 'acure_dry_dep_so2',
       'acure_kappa_oc', 'acure_sig_w', 'acure_rain_frac',
       'acure_cloud_ice_thresh', 'acure_convective_plume_scavenging',
       'acure_scav_diam', 'acure_bc_ri', 'acure_oxidants_oh',
       'acure_oxidants_o3', 'bparam', 'two_d_fsd_factor', 'c_r_correl',
       'acure_autoconv_exp_lwp', 'acure_autoconv_exp_nd', 'dbsdtbs_turb_0',
       'ai', 'm_ci', 'a_ent_1_rp']
kernelLen = 59 - len(excludeFeats)

# create kernel
GPR_kernel = c3.SklearnGPRKernelMatern(lengthScale=[1.0]*kernelLen, nu=0.5, coefficient=1.0).build().kernel.upsert()

# define technique
GPR_technique = c3.GaussianProcessRegressionTechnique(
                    randomState=42,
                    kernel = GPR_kernel
).upsert()

# define data source spec
GPR_dataspec = c3.GPRDataSourceSpec(
    featuresType = c3.TypeRef(
        typeName="SimulationModelParameters"
    ),
    featuresSpec=c3.FetchSpec(
        limit=-1
    ),
    excludeFeatures=excludeFeats,
    targetType=c3.TypeRef(
        typeName="Simulation3HourlyAODOutput"
    ),
    targetSpec=c3.FetchSpec(
        filter=gstpFilterShort ###
    ),
    targetName="all"
).upsert()

# create pipe
GPR_pipe = c3.GaussianProcessRegressionPipe(
    technique=GPR_technique,
    dataSourceSpec=GPR_dataspec
)

In [63]:
import pandas as pd

X = GPR_pipe.getFeatures()
dfX = c3.Dataset.toPandas(dataset=X)
X = c3.Dataset.fromPython(pd.concat([dfX]*5, axis=0, ignore_index=True))
c3.Dataset.toPandas(dataset=X)

Unnamed: 0,acure_dry_dep_acc
0,0.500000
1,0.470000
2,0.618559
3,0.407896
4,0.746683
...,...
1100,0.853673
1101,0.511722
1102,0.833222
1103,0.910051


In [64]:
y = GPR_pipe.getTarget()
dfy = c3.Dataset.toPandas(dataset=y)
dfy

Unnamed: 0,all
0,0.180022
1,0.188939
2,0.582351
3,0.197222
4,0.144252
...,...
1100,0.145601
1101,0.099397
1102,0.091157
1103,0.101554


In [65]:
GPR_trained = GPR_pipe.train(input=X, targetOutput=y)

## Long example

This example should return a StringOverflow error.

In [66]:
# features to ignore
excludeFeats = ['acure_bl_nuc', 'acure_ait_width', 'acure_cloud_ph',
       'acure_carb_ff_ems', 'acure_carb_ff_ems_eur', 'acure_carb_ff_ems_nam',
       'acure_carb_ff_ems_chi', 'acure_carb_ff_ems_asi',
       'acure_carb_ff_ems_mar', 'acure_carb_ff_ems_r', 'acure_carb_bb_ems',
       'acure_carb_bb_ems_sam', 'acure_carb_bb_ems_naf',
       'acure_carb_bb_ems_saf', 'acure_carb_bb_ems_bnh',
       'acure_carb_bb_ems_rnh', 'acure_carb_bb_ems_rsh', 'acure_carb_res_ems',
       'acure_carb_res_ems_chi', 'acure_carb_res_ems_asi',
       'acure_carb_res_ems_afr', 'acure_carb_res_ems_lat',
       'acure_carb_res_ems_r', 'acure_carb_ff_diam', 'acure_carb_bb_diam',
       'acure_carb_res_diam', 'acure_prim_so4_diam', 'acure_sea_spray',
       'acure_anth_so2', 'acure_anth_so2_chi', 'acure_anth_so2_asi',
       'acure_anth_so2_eur', 'acure_anth_so2_nam', 'acure_anth_so2_r',
       'acure_volc_so2', 'acure_bvoc_soa', 'acure_dms', 'acure_prim_moc',
       'acure_dry_dep_ait', 'acure_dry_dep_so2',
       'acure_kappa_oc', 'acure_sig_w', 'acure_rain_frac',
       'acure_cloud_ice_thresh', 'acure_convective_plume_scavenging',
       'acure_scav_diam', 'acure_bc_ri', 'acure_oxidants_oh',
       'acure_oxidants_o3', 'bparam', 'two_d_fsd_factor', 'c_r_correl',
       'acure_autoconv_exp_lwp', 'acure_autoconv_exp_nd', 'dbsdtbs_turb_0',
       'ai', 'm_ci', 'a_ent_1_rp']
kernelLen = 59 - len(excludeFeats)

# create kernel
GPR_kernel = c3.SklearnGPRKernelMatern(lengthScale=[1.0]*kernelLen, nu=0.5, coefficient=1.0).build().kernel.upsert()

# define technique
GPR_technique = c3.GaussianProcessRegressionTechnique(
                    randomState=42,
                    kernel = GPR_kernel
).upsert()

# define data source spec
GPR_dataspec = c3.GPRDataSourceSpec(
    featuresType = c3.TypeRef(
        typeName="SimulationModelParameters"
    ),
    featuresSpec=c3.FetchSpec(
        limit=-1
    ),
    excludeFeatures=excludeFeats,
    targetType=c3.TypeRef(
        typeName="Simulation3HourlyAODOutput"
    ),
    targetSpec=c3.FetchSpec(
        filter=gstpFilterLong ###
    ),
    targetName="all"
).upsert()

# create pipe
GPR_pipe = c3.GaussianProcessRegressionPipe(
    technique=GPR_technique,
    dataSourceSpec=GPR_dataspec
)

500 - StringOverflow - c3.engine.database.DbException_validationErrors [8226.11971]
message: "Write failed: Attempt to insert/update a value of length 4261 for field filter with column length 4000: geoSurfaceTimePoint.id == "-8.125_-34.688_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_-44.062_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-0.625_-17.812_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_25.312_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_-12.188_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_-4.688_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_14.062_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_2.812_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-0.625_-15.938_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_-29.062_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_-42.188_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-10.625_-12.188_2017-07-01T00:20:00" || geoSurfaceTime

C3RuntimeException: 500 - StringOverflow - c3.engine.database.DbException_validationErrors [8226.11971]
message: "Write failed: Attempt to insert/update a value of length 4261 for field filter with column length 4000: geoSurfaceTimePoint.id == "-8.125_-34.688_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_-44.062_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-0.625_-17.812_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_25.312_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_-12.188_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_-4.688_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_14.062_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_2.812_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-0.625_-15.938_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_-29.062_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_-42.188_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-10.625_-12.188_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_-30.938_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_36.562_2017-07-01T00:20:00" || geoSurfaceTimePoint.id == "-8.125_4.688_2017-07-01T00:20:00" || geoSurfaceTimePoi..."
JSON: {"this": {"type": "GPRDataSourceSpec", "featuresType": {"type": "TypeRef", "typeName": "SimulationModelParameters"}, "featuresSpec": {"type": "FetchSpec", "offset": 0, "limit": -1}, "excludeFeatures": ["acure_bl_nuc", "acure_ait_width", "acure_cloud_ph", "acure_carb_ff_ems", "acure_carb_ff_ems_eur", "acure_carb_ff_ems_nam", "acure_carb_ff_ems_chi", "acure_carb_ff_ems_asi", "acure_carb_ff_ems_mar", "acure_carb_ff_ems_r", "acure_carb_bb_ems", "acure_carb_bb_ems_sam", "acure_carb_bb_ems_naf", "acure_carb_bb_ems_saf", "acure_carb_bb_ems_bnh", "acure_carb_bb_ems_rnh", "acure_carb_bb_ems_rsh", "acure_carb_res_ems", "acure_carb_res_ems_chi", "acure_carb_res_ems_asi", "acure_carb_res_ems_afr", "acure_carb_res_ems_lat", "acure_carb_res_ems_r", "acure_carb_ff_diam", "acure_carb_bb_diam", "acure_carb_res_diam", "acure_prim_so4_diam", "acure_sea_spray", "acure_anth_so2", "acure_anth_so2_chi", "acure_anth_so2_asi", "acure_anth_so2_eur", "acure_anth_so2_nam", "acure_anth_so2_r", "acure_volc_so2", "acure_bvoc_soa", "acure_dms", "acure_prim_moc", "acure_dry_dep_ait", "acure_dry_dep_so2", "acure_kappa_oc", "acure_sig_w", "acure_rain_frac", "acure_cloud_ice_thresh", "acure_convective_plume_scavenging", "acure_scav_diam", "acure_bc_ri", "acure_oxidants_oh", "acure_oxidants_o3", "bparam", "two_d_fsd_factor", "c_r_correl", "acure_autoconv_exp_lwp", "acure_autoconv_exp_nd", "dbsdtbs_turb_0", "ai", "m_ci", "a_ent_1_rp"], "targetType": {"type": "TypeRef", "typeName": "Simulation3HourlyAODOutput"}, "targetSpec": {"type": "FetchSpec", "filter": "geoSurfaceTimePoint.id == \"-8.125_-34.688_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-44.062_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-0.625_-17.812_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_25.312_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-12.188_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-4.688_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_14.062_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_2.812_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-0.625_-15.938_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-29.062_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-42.188_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-10.625_-12.188_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-30.938_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_36.562_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_4.688_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-0.625_-0.938_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-10.625_-21.562_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-10.625_-15.938_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_19.688_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_12.188_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_30.938_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-40.312_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-14.062_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-0.625_-21.562_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-10.625_-0.938_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-0.938_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-8.438_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_15.938_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-23.438_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-10.625_-10.312_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-32.812_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_10.312_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-10.625_-23.438_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-0.625_-12.188_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-0.625_-2.812_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-21.562_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-0.625_-10.312_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-10.625_-19.688_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-10.625_-2.812_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_17.812_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_6.562_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-0.625_-14.062_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-25.312_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-10.625_-17.812_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-19.688_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-17.812_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-0.625_-19.688_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-36.562_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-15.938_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-2.812_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_34.688_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-27.188_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_29.062_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-6.562_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_32.812_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_21.562_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_38.438_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_23.438_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_27.188_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_8.438_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-10.625_-14.062_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-10.312_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-0.625_-23.438_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_-38.438_2017-07-01T00:20:00\" || geoSurfaceTimePoint.id == \"-8.125_0.938_2017-07-01T00:20:00\"", "offset": 0, "limit": 2000}, "targetName": "all"}, "srcObj": null, "spec": null}

## Try with intersecting filter

In [86]:
# features to ignore
excludeFeats = ['acure_bl_nuc', 'acure_ait_width', 'acure_cloud_ph',
       'acure_carb_ff_ems', 'acure_carb_ff_ems_eur', 'acure_carb_ff_ems_nam',
       'acure_carb_ff_ems_chi', 'acure_carb_ff_ems_asi',
       'acure_carb_ff_ems_mar', 'acure_carb_ff_ems_r', 'acure_carb_bb_ems',
       'acure_carb_bb_ems_sam', 'acure_carb_bb_ems_naf',
       'acure_carb_bb_ems_saf', 'acure_carb_bb_ems_bnh',
       'acure_carb_bb_ems_rnh', 'acure_carb_bb_ems_rsh', 'acure_carb_res_ems',
       'acure_carb_res_ems_chi', 'acure_carb_res_ems_asi',
       'acure_carb_res_ems_afr', 'acure_carb_res_ems_lat',
       'acure_carb_res_ems_r', 'acure_carb_ff_diam', 'acure_carb_bb_diam',
       'acure_carb_res_diam', 'acure_prim_so4_diam', 'acure_sea_spray',
       'acure_anth_so2', 'acure_anth_so2_chi', 'acure_anth_so2_asi',
       'acure_anth_so2_eur', 'acure_anth_so2_nam', 'acure_anth_so2_r',
       'acure_volc_so2', 'acure_bvoc_soa', 'acure_dms', 'acure_prim_moc',
       'acure_dry_dep_ait', 'acure_dry_dep_so2',
       'acure_kappa_oc', 'acure_sig_w', 'acure_rain_frac',
       'acure_cloud_ice_thresh', 'acure_convective_plume_scavenging',
       'acure_scav_diam', 'acure_bc_ri', 'acure_oxidants_oh',
       'acure_oxidants_o3', 'bparam', 'two_d_fsd_factor', 'c_r_correl',
       'acure_autoconv_exp_lwp', 'acure_autoconv_exp_nd', 'dbsdtbs_turb_0',
       'ai', 'm_ci', 'a_ent_1_rp']
kernelLen = 59 - len(excludeFeats)

# create kernel
GPR_kernel = c3.SklearnGPRKernelMatern(lengthScale=[1.0]*kernelLen, nu=0.5, coefficient=1.0).build().kernel.upsert()

# define technique
GPR_technique = c3.GaussianProcessRegressionTechnique(
                    randomState=42,
                    kernel = GPR_kernel
).upsert()

# define data source spec
GPR_dataspec = c3.GPRDataSourceSpec(
    featuresType = c3.TypeRef(
        typeName="SimulationModelParameters"
    ),
    featuresSpec=c3.FetchSpec(
        limit=-1
    ),
    excludeFeatures=excludeFeats,
    targetType=c3.TypeRef(
        typeName="Simulation3HourlyAODOutput"
    ),
    targetSpec=c3.FetchSpec(
        filter=c3.Filter().intersects("geoSurfaceTimePoint.id", geoSurfaceTimePoints[0:100])
    ),
    targetName="all"
).upsert()

# create pipe
GPR_pipe = c3.GaussianProcessRegressionPipe(
    technique=GPR_technique,
    dataSourceSpec=GPR_dataspec
)

500 - StringOverflow - c3.engine.database.DbException_validationErrors [5244.12227]
message: "Write failed: Attempt to insert/update a value of length 4367 for field filter with column length 4000: intersects(geoSurfaceTimePoint.id, ["geoSurfaceTimePoint.id == \"-8.125_-34.688_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8.125_-44.062_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-0.625_-17.812_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8.125_25.312_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8.125_-12.188_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8.125_-4.688_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8.125_14.062_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8.125_2.812_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-0.625_-15.938_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8.125_-29.062_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8.125_-42.188_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-10.6

C3RuntimeException: 500 - StringOverflow - c3.engine.database.DbException_validationErrors [5244.12227]
message: "Write failed: Attempt to insert/update a value of length 4367 for field filter with column length 4000: intersects(geoSurfaceTimePoint.id, ["geoSurfaceTimePoint.id == \"-8.125_-34.688_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8.125_-44.062_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-0.625_-17.812_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8.125_25.312_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8.125_-12.188_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8.125_-4.688_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8.125_14.062_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8.125_2.812_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-0.625_-15.938_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8.125_-29.062_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8.125_-42.188_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-10.625_-12.188_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8.125_-30.938_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8.125_36.562_2017-07-01T00:20:00\"","geoSurfaceTimePoint.id == \"-8..."
JSON: {"this": {"type": "GPRDataSourceSpec", "featuresType": {"type": "TypeRef", "typeName": "SimulationModelParameters"}, "featuresSpec": {"type": "FetchSpec", "offset": 0, "limit": -1}, "excludeFeatures": ["acure_bl_nuc", "acure_ait_width", "acure_cloud_ph", "acure_carb_ff_ems", "acure_carb_ff_ems_eur", "acure_carb_ff_ems_nam", "acure_carb_ff_ems_chi", "acure_carb_ff_ems_asi", "acure_carb_ff_ems_mar", "acure_carb_ff_ems_r", "acure_carb_bb_ems", "acure_carb_bb_ems_sam", "acure_carb_bb_ems_naf", "acure_carb_bb_ems_saf", "acure_carb_bb_ems_bnh", "acure_carb_bb_ems_rnh", "acure_carb_bb_ems_rsh", "acure_carb_res_ems", "acure_carb_res_ems_chi", "acure_carb_res_ems_asi", "acure_carb_res_ems_afr", "acure_carb_res_ems_lat", "acure_carb_res_ems_r", "acure_carb_ff_diam", "acure_carb_bb_diam", "acure_carb_res_diam", "acure_prim_so4_diam", "acure_sea_spray", "acure_anth_so2", "acure_anth_so2_chi", "acure_anth_so2_asi", "acure_anth_so2_eur", "acure_anth_so2_nam", "acure_anth_so2_r", "acure_volc_so2", "acure_bvoc_soa", "acure_dms", "acure_prim_moc", "acure_dry_dep_ait", "acure_dry_dep_so2", "acure_kappa_oc", "acure_sig_w", "acure_rain_frac", "acure_cloud_ice_thresh", "acure_convective_plume_scavenging", "acure_scav_diam", "acure_bc_ri", "acure_oxidants_oh", "acure_oxidants_o3", "bparam", "two_d_fsd_factor", "c_r_correl", "acure_autoconv_exp_lwp", "acure_autoconv_exp_nd", "dbsdtbs_turb_0", "ai", "m_ci", "a_ent_1_rp"], "targetType": {"type": "TypeRef", "typeName": "Simulation3HourlyAODOutput"}, "targetSpec": {"type": "FetchSpec", "filter": "intersects(geoSurfaceTimePoint.id, [\"geoSurfaceTimePoint.id == \\\"-8.125_-34.688_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-44.062_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-0.625_-17.812_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_25.312_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-12.188_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-4.688_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_14.062_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_2.812_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-0.625_-15.938_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-29.062_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-42.188_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-10.625_-12.188_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-30.938_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_36.562_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_4.688_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-0.625_-0.938_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-10.625_-21.562_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-10.625_-15.938_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_19.688_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_12.188_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_30.938_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-40.312_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-14.062_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-0.625_-21.562_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-10.625_-0.938_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-0.938_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-8.438_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_15.938_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-23.438_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-10.625_-10.312_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-32.812_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_10.312_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-10.625_-23.438_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-0.625_-12.188_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-0.625_-2.812_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-21.562_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-0.625_-10.312_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-10.625_-19.688_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-10.625_-2.812_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_17.812_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_6.562_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-0.625_-14.062_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-25.312_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-10.625_-17.812_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-19.688_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-17.812_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-0.625_-19.688_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-36.562_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-15.938_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-2.812_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_34.688_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-27.188_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_29.062_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-6.562_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_32.812_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_21.562_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_38.438_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_23.438_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_27.188_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_8.438_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-10.625_-14.062_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-10.312_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-0.625_-23.438_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_-38.438_2017-07-01T00:20:00\\\"\",\"geoSurfaceTimePoint.id == \\\"-8.125_0.938_2017-07-01T00:20:00\\\"\"])", "offset": 0, "limit": 2000}, "targetName": "all"}, "srcObj": null, "spec": null}