# Set up

Borrowing from GPRforAOD, we send two batch jobs for training multiple GPR pipes, some with centered target and others without.

In [1]:
# features to ignore
excludeFeats = [
    "acure_carb_ff_ems", "acure_carb_bb_ems", "acure_carb_res_ems", "acure_anth_so2",
    'acure_carb_ff_ems_eur', 'acure_carb_ff_ems_nam',
    'acure_carb_ff_ems_chi', 'acure_carb_ff_ems_asi',
    'acure_carb_ff_ems_mar', 'acure_carb_ff_ems_r', 'acure_carb_bb_ems_sam',
    'acure_carb_bb_ems_naf', 'acure_carb_bb_ems_saf',
    'acure_carb_bb_ems_bnh', 'acure_carb_bb_ems_rnh',
    'acure_carb_bb_ems_rsh', 'acure_carb_res_ems_chi',
    'acure_carb_res_ems_asi', 'acure_carb_res_ems_afr',
    'acure_carb_res_ems_lat', 'acure_carb_res_ems_r', 'acure_carb_ff_diam',
    'acure_carb_res_diam', 'acure_prim_so4_diam',
    'acure_anth_so2_chi', 'acure_anth_so2_asi',
    'acure_anth_so2_eur', 'acure_anth_so2_nam',
    'acure_volc_so2', 'acure_prim_moc',
    'acure_kappa_oc', 'acure_sig_w', 'acure_rain_frac',
    'acure_cloud_ice_thresh', 'acure_convective_plume_scavenging',
    'acure_scav_diam', 'acure_oxidants_oh',
    'acure_oxidants_o3', 'two_d_fsd_factor', 'c_r_correl',
    'acure_autoconv_exp_lwp',
    'ai', 'm_ci'
]

kernelLen = 59 - len(excludeFeats)

# create kernel
GPR_kernel = c3.SklearnGPRKernelMatern(lengthScale=[1.0]*kernelLen, nu=0.5, coefficient=1.0).build().kernel.upsert()

In [2]:
lat1 = 0.0
lat2 = 1.0
lon1 = 0.0
lon2 = 1.0
time1 = "2017-07-01T00:00:00"
time2 = "2017-07-01T03:59:59"
gstpFilter = c3.Filter() \
    .ge("latitude", lat1) \
    .and_().le("latitude", lat2) \
    .and_().ge("longitude", lon1) \
    .and_().le("longitude", lon2) \
    .and_().ge("time", time1) \
    .and_().le("time", time2)

## Two different jobs

Where the objects have a "2" appended, we have the "centerTarget" flag set to True.

### Uncentered training

In [3]:
# define technique
GPR_technique = c3.GaussianProcessRegressionTechnique(
    randomState = 42,
    kernel = GPR_kernel,
    centerTarget = False
).upsert()

In [4]:
jobOptions = c3.AODGaussianMLTrainingJobOptions(
    batchSize=2,
    gstpFilter=gstpFilter,
    targetName="all",
    gprTechnique=GPR_technique,
    excludeFeatures=excludeFeats
)

In [5]:
job = c3.AODGaussianMLTrainingJob(
    options=jobOptions
).upsert()

job.start()

c3.BatchJobStatus(
 started=datetime.datetime(2022, 9, 19, 14, 55, 39, tzinfo=datetime.timezone.utc),
 startedby='jcarzon@andrew.cmu.edu',
 status='running')

In [8]:
job.status()

c3.BatchJobStatus(
 started=datetime.datetime(2022, 9, 19, 14, 55, 39, tzinfo=datetime.timezone.utc),
 startedby='jcarzon@andrew.cmu.edu',
 completed=datetime.datetime(2022, 9, 19, 14, 56, 3, tzinfo=datetime.timezone.utc),
 status='completed',
 newBatchSubmitted=False)

### Uncentered parameter collection

In [13]:
job3 = c3.AODGPRModelFinder.extractLearnedParametersJob(excludeFeats, gstpFilter, "all", GPR_technique, 10)

In [43]:
job3.status()

c3.MapReduceStatus(
 started=datetime.datetime(2022, 9, 19, 16, 56, 14, tzinfo=datetime.timezone.utc),
 startedby='jcarzon@andrew.cmu.edu',
 completed=datetime.datetime(2022, 9, 19, 16, 56, 30, tzinfo=datetime.timezone.utc),
 status='failed',
 errors=c3.Arry<JobRunErrorDetail>([c3.JobRunErrorDetail(
           failedActionId='6179.125104560',
           errorMsg='Error executing command: '
                     '/usr/local/share/c3/condaEnvs/dev/tc02d/py-client_1_0_0/bin/python '
                     '/tmp/pythonActionSourceCache7751211527805788949/Lambda<function(batch: '
                     'any, objs: any, job: any): any>_applyPython.py\n'
                     'Process exited with 3 exit code.\n'
                     'a_id=6179.125105580 a_implementation=python t_tenant=dev '
                     't_tag=tc02d t_type=Lambda<function(batch: any, objs: '
                     'any, job: any): any> t_action=applyPython '
                     'p_logger=action url=http://dev-dti-app-w-006:

In [45]:
print(job3.status().errors[0].errorLog)

c3.love.exceptions.C3RuntimeException: Error executing command: /usr/local/share/c3/condaEnvs/dev/tc02d/py-client_1_0_0/bin/python /tmp/pythonActionSourceCache7751211527805788949/Lambda<function(batch: any, objs: any, job: any): any>_applyPython.py
Process exited with 3 exit code.
a_id=6179.125105580 a_implementation=python t_tenant=dev t_tag=tc02d t_type=Lambda<function(batch: any, objs: any, job: any): any> t_action=applyPython p_logger=action url=http://dev-dti-app-w-006:8080 connector=null mode="thick" Action failed!
Traceback (most recent call last):
  File "/tmp/pythonActionSourceCache7751211527805788949/Lambda<function(batch: any, objs: any, job: any): any>_applyPython.py", line 406, in _c3_remote_bootstrap__run_c3_action
    _c3_result = _action()
  File "/tmp/pythonActionSourceCache7751211527805788949/Lambda<function(batch: any, objs: any, job: any): any>_applyPython.py", line 530, in <lambda>
    action=lambda: applyPython(this = _c3_inputs.get('this'),actuals = _c3_inputs.ge

### Centered training

In [None]:
GPR_technique2 = c3.GaussianProcessRegressionTechnique(
    randomState = 42,
    kernel = GPR_kernel,
    centerTarget = True
).upsert()

In [None]:
jobOptions2 = c3.AODGaussianMLTrainingJobOptions(
    batchSize=1,
    gstpFilter=gstpFilter,
    targetName="all",
    gprTechnique=GPR_technique2,
    excludeFeatures=excludeFeats
)

In [19]:
job2 = c3.AODGaussianMLTrainingJob(
    options=jobOptions2
).upsert()

job2.start()

c3.BatchJobStatus(
 started=datetime.datetime(2022, 9, 19, 14, 47, 20, tzinfo=datetime.timezone.utc),
 startedby='jcarzon@andrew.cmu.edu',
 status='running')

In [23]:
job2.status()

c3.BatchJobStatus(
 started=datetime.datetime(2022, 9, 19, 14, 47, 20, tzinfo=datetime.timezone.utc),
 startedby='jcarzon@andrew.cmu.edu',
 completed=datetime.datetime(2022, 9, 19, 14, 47, 38, tzinfo=datetime.timezone.utc),
 status='completed',
 newBatchSubmitted=False)

### Centered parameter collection

In [44]:
job4 = c3.AODGPRModelFinder.extractLearnedParametersJob(excludeFeats, gstpFilter, "all", GPR_technique2, 10)

In [None]:
job4.status()

## Cast results into a dataframe

In [None]:
df = c3.AODGPRModelFinder.getDataframeFromJob(job3)

In [None]:
df2 = c3.AODGPRModelFinder.getDataframeFromJob(job4)