/
ml_experiment_demo.dig
67 lines (59 loc) · 2.88 KB
/
ml_experiment_demo.dig
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
timezone: Asia/Tokyo
#timezone: PST
_export:
!include : config/params.yaml
td:
engine: presto
database: ${output_database}
+create_db_tbl_if_not_exists:
td_ddl>:
create_databases: ["${output_database}"]
create_tables: ["${expr_tracking_table}"]
+load_datasets:
ipynb>:
notebook: ml_datasets
output_database: ${input_database}
input_table: ${input_database}.dummy
# datasets: gluon, bank_marketing
datasets: gluon
+gluon_train:
ml_train>:
notebook: gluon_train
model_name: gluon_model_${session_id}
input_table: ${input_database}.gluon_train # expect database_name.table_name
target_column: class
# The following options are optional ones
#problem_type: binary # ‘binary’, ‘multiclass’, ‘regression’, or ‘quantile’. autolugon automatically detect problem types
#eval_metric: roc_auc # autolugon automatically select a right eval_metric for a given setting if not specified.
ignore_columns: time,rowid # Note time column is ignored by the default.
time_limit: 60 * 3 # fit timeout. 3 min just for training time. Default: 60 * 60 (1hr). 1hr or more is recommended for production purposes (Note 24 hours at max). Note this is a soft limit, not hard limit.
# timeout: 60 * 3 # timeout for notebook cell-level execution. This is a hard limit. Note it's cell-level timeout. No timeout if not specified.
export_leaderboard: ${output_database}.leaderboard_gluon_train
export_feature_importance: ${output_database}.feature_importance_gluon_train
# hide_table_contents: true
+print_train_result:
echo>: "executed ${automl.last_executed_notebook}.ipynb"
+track_experiment:
td>: queries/track_experiment.sql
insert_into: automl_experiments
last_executed_notebook: ${automl.last_executed_notebook}
user_id: ${automl.last_executed_user_id}
user_email: ${automl.last_executed_user_email}
model_name: gluon_model_${session_id}
task_attempt_id: ${attempt_id}
session_time: ${session_local_time}
engine: presto
+gluon_predict:
ml_predict>:
notebook: gluon_predict
model_name: gluon_model_${session_id}
input_table: ${input_database}.gluon_test # expect database_name.table_name
output_table: ${output_database}.gluon_predicted # expect database_name.table_name. DB will be created if not exists. table is overwrite'd.
# optional
#rowid_column: rowid # Note when rowid_column is specified, only rowid column + prediction result columns are resulted in the output table
#ignore_columns: time # target column should not be in test data
export_leaderboard: ${output_database}.leaderboard_gluon_predict
export_feature_importance: ${output_database}.feature_importance_gluon_predict
# hide_table_contents: true
+print_predict_result:
echo>: "executed ${automl.last_executed_notebook}.ipynb"