sql-machine-learning
diff --git a/‎README.md‎
Lines changed: 2 additions & 2 deletions b/‎README.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cmd/sqlflowserver/main_test.go‎
Lines changed: 24 additions & 24 deletions b/‎cmd/sqlflowserver/main_test.go‎
Lines changed: 24 additions & 24 deletions
diff --git a/‎doc/design/alps_submitter.md‎
Lines changed: 6 additions & 6 deletions b/‎doc/design/alps_submitter.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎doc/design/analyzer.md‎
Lines changed: 2 additions & 2 deletions b/‎doc/design/analyzer.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎doc/design/ant_xgboost.md‎
Lines changed: 3 additions & 3 deletions b/‎doc/design/ant_xgboost.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎doc/design/clustermodel.md‎
Lines changed: 6 additions & 6 deletions b/‎doc/design/clustermodel.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎doc/design/database_abstraction_layer.md‎
Lines changed: 2 additions & 2 deletions b/‎doc/design/database_abstraction_layer.md‎
Lines changed: 2 additions & 2 deletions
@@ -31,7 +31,7 @@ Here are examples for training a Tensorflow [DNNClassifer](https://www.tensorflo
 ```sql
 sqlflow> SELECT *
 FROM iris.train
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH model.n_classes = 3, model.hidden_units = [10, 20]
 COLUMN sepal_length, sepal_width, petal_length, petal_width
 LABEL class
@@ -45,7 +45,7 @@ Done training
 ```sql
 sqlflow> SELECT *
 FROM iris.test
-PREDICT iris.predict.class
+TO PREDICT iris.predict.class
 USING sqlflow_models.my_dnn_model;
 
 ...
 
@@ -337,7 +337,7 @@ func CaseTrainTextClassificationIR(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := `SELECT news_title, class_id
 FROM text_cn.train_processed
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH model.n_classes = 17, model.hidden_units = [10, 20]
 COLUMN EMBEDDING(CATEGORY_ID(SPARSE(news_title,16000,COMMA), 16000),128,mean)
 LABEL class_id
@@ -352,7 +352,7 @@ func CaseTrainTextClassificationFeatureDerivation(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := `SELECT news_title, class_id
 FROM text_cn.train_processed
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH model.n_classes = 17, model.hidden_units = [10, 20]
 COLUMN EMBEDDING(SPARSE(news_title,16000,COMMA),128,mean)
 LABEL class_id
@@ -576,7 +576,7 @@ func CaseTrainSQL(t *testing.T) {
 	trainSQL := fmt.Sprintf(`
 	SELECT *
 	FROM %s.%s
-	TRAIN DNNClassifier
+	TO TRAIN DNNClassifier
 	WITH
 		model.n_classes = 3,
 		model.hidden_units = [10, 20],
@@ -592,7 +592,7 @@ func CaseTrainSQL(t *testing.T) {
 
 	predSQL := fmt.Sprintf(`SELECT *
 FROM %s.%s
-PREDICT %s.%s.class
+TO PREDICT %s.%s.class
 USING sqlflow_models.my_dnn_model;`, caseDB, caseTestTable, caseDB, casePredictTable)
 	_, _, err = connectAndRunSQL(predSQL)
 	if err != nil {
@@ -624,7 +624,7 @@ func CaseTrainFeatureDerevation(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := fmt.Sprintf(`SELECT *
 FROM %s.%s
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH model.n_classes = 3, model.hidden_units = [10, 20]
 LABEL class
 INTO sqlflow_models.my_dnn_model;`, caseDB, caseTrainTable)
@@ -633,7 +633,7 @@ INTO sqlflow_models.my_dnn_model;`, caseDB, caseTrainTable)
 
 	// TODO(typhoonzero): also support string column type for training and prediction (column c6)
 	trainVaryColumnTypes := `SELECT c1, c2, c3, c4, c5, class from feature_derivation_case.train
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH model.n_classes=3, model.hidden_units=[10,10]
 COLUMN EMBEDDING(c3, 128, sum), EMBEDDING(SPARSE(c5, 10000, COMMA), 128, sum)
 LABEL class
@@ -646,7 +646,7 @@ func CaseTrainCustomModel(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := `SELECT *
 FROM iris.train
-TRAIN sqlflow_models.DNNClassifier
+TO TRAIN sqlflow_models.DNNClassifier
 WITH model.n_classes = 3, model.hidden_units = [10, 20]
 COLUMN sepal_length, sepal_width, petal_length, petal_width
 LABEL class
@@ -658,7 +658,7 @@ INTO sqlflow_models.my_dnn_model_custom;`
 
 	predSQL := `SELECT *
 FROM iris.test
-PREDICT iris.predict.class
+TO PREDICT iris.predict.class
 USING sqlflow_models.my_dnn_model_custom;`
 	_, _, err = connectAndRunSQL(predSQL)
 	if err != nil {
@@ -684,7 +684,7 @@ func CaseTrainTextClassification(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := `SELECT news_title, class_id
 FROM text_cn.train_processed
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH model.n_classes = 17, model.hidden_units = [10, 20]
 COLUMN EMBEDDING(CATEGORY_ID(news_title,16000,COMMA),128,mean)
 LABEL class_id
@@ -699,7 +699,7 @@ func CaseTrainTextClassificationCustomLSTM(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := `SELECT news_title, class_id
 FROM text_cn.train_processed
-TRAIN sqlflow_models.StackedBiLSTMClassifier
+TO TRAIN sqlflow_models.StackedBiLSTMClassifier
 WITH model.n_classes = 17, model.stack_units = [16], train.epoch = 1, train.batch_size = 32
 COLUMN EMBEDDING(SEQ_CATEGORY_ID(news_title,1600,COMMA),128,mean)
 LABEL class_id
@@ -714,7 +714,7 @@ func CaseTrainSQLWithHyperParams(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := `SELECT *
 FROM iris.train
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH model.n_classes = 3, model.hidden_units = [10, 20], train.batch_size = 10, train.epoch = 2
 COLUMN sepal_length, sepal_width, petal_length, petal_width
 LABEL class
@@ -729,7 +729,7 @@ func CaseTrainDeepWideModel(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := `SELECT *
 FROM iris.train
-TRAIN DNNLinearCombinedClassifier
+TO TRAIN DNNLinearCombinedClassifier
 WITH model.n_classes = 3, model.dnn_hidden_units = [10, 20], train.batch_size = 10, train.epoch = 2
 COLUMN sepal_length, sepal_width FOR linear_feature_columns
 COLUMN petal_length, petal_width FOR dnn_feature_columns
@@ -746,7 +746,7 @@ func CaseTrainCustomModelWithHyperParams(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := `SELECT *
 FROM iris.train
-TRAIN sqlflow_models.DNNClassifier
+TO TRAIN sqlflow_models.DNNClassifier
 WITH model.n_classes = 3, model.hidden_units = [10, 20], train.batch_size = 10, train.epoch=2
 COLUMN sepal_length, sepal_width, petal_length, petal_width
 LABEL class
@@ -761,7 +761,7 @@ func CaseSparseFeature(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := `SELECT news_title, class_id
 FROM text_cn.train
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH model.n_classes = 3, model.hidden_units = [10, 20]
 COLUMN EMBEDDING(CATEGORY_ID(news_title,16000,COMMA),128,mean)
 LABEL class_id
@@ -777,7 +777,7 @@ func CaseTrainElasticDL(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := fmt.Sprintf(`SELECT sepal_length, sepal_width, petal_length, petal_width, class
 FROM %s.%s
-TRAIN ElasticDLDNNClassifier
+TO TRAIN ElasticDLDNNClassifier
 WITH
 			model.optimizer = "optimizer",
 			model.loss = "loss",
@@ -830,7 +830,7 @@ func CaseTrainALPS(t *testing.T) {
 	SELECT deep_id, user_space_stat, user_behavior_stat, space_stat, l
 	FROM %s.sparse_column_test
 	LIMIT 100
-	TRAIN DNNClassifier
+	TO TRAIN DNNClassifier
 	WITH
 	    model.n_classes = 2,
 	    model.hidden_units = [10, 20],
@@ -863,8 +863,8 @@ func CaseTrainALPSRemoteModel(t *testing.T) {
 	trainSQL := fmt.Sprintf(`SELECT deep_id, user_space_stat, user_behavior_stat, space_stat, l
 FROM %s.sparse_column_test
 LIMIT 100
-TRAIN models.estimator.dnn_classifier.DNNClassifier
-WITH
+TO TRAIN models.estimator.dnn_classifier.DNNClassifier
+WITH 
 	model.n_classes = 2, model.hidden_units = [10, 20], train.batch_size = 10, engine.ps_num=0, engine.worker_num=0, engine.type=local,
 	gitlab.project = "Alps/sqlflow-models",
 	gitlab.source_root = python,
@@ -891,7 +891,7 @@ func CaseTrainALPSFeatureMap(t *testing.T) {
 	trainSQL := fmt.Sprintf(`SELECT dense, deep, item, test_sparse_with_fm.label
 FROM %s.test_sparse_with_fm
 LIMIT 32
-TRAIN alipay.SoftmaxClassifier
+TO TRAIN alipay.SoftmaxClassifier
 WITH train.max_steps = 32, eval.steps=32, train.batch_size=8, engine.ps_num=0, engine.worker_num=0, engine.type = local
 COLUMN DENSE(dense, none, comma),
        DENSE(item, 1, comma, int)
@@ -931,7 +931,7 @@ func CaseTrainRegression(t *testing.T) {
 	a := assert.New(t)
 	trainSQL := fmt.Sprintf(`SELECT *
 FROM housing.train
-TRAIN LinearRegressor
+TO TRAIN LinearRegressor
 WITH model.label_dimension=1
 COLUMN f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13
 LABEL target
@@ -943,7 +943,7 @@ INTO sqlflow_models.my_regression_model;`)
 
 	predSQL := fmt.Sprintf(`SELECT *
 FROM housing.test
-PREDICT housing.predict.target
+TO PREDICT housing.predict.target
 USING sqlflow_models.my_regression_model;`)
 	_, _, err = connectAndRunSQL(predSQL)
 	if err != nil {
@@ -977,7 +977,7 @@ func CaseTrainXGBoostRegression(t *testing.T) {
 	trainSQL := fmt.Sprintf(`
 SELECT *
 FROM housing.train
-TRAIN xgboost.gbtree
+TO TRAIN xgboost.gbtree
 WITH
 		objective="reg:squarederror",
 		train.num_boost_round = 30
@@ -998,7 +998,7 @@ func CaseTrainAndAnalyzeXGBoostModel(t *testing.T) {
 	trainStmt := `
 SELECT *
 FROM housing.train
-TRAIN xgboost.gbtree
+TO TRAIN xgboost.gbtree
 WITH
 	objective="reg:squarederror",
 	train.num_boost_round = 30
@@ -1040,7 +1040,7 @@ func CasePredictXGBoostRegression(t *testing.T) {
 	a := assert.New(t)
 	predSQL := fmt.Sprintf(`SELECT *
 FROM housing.test
-PREDICT housing.xgb_predict.target
+TO PREDICT housing.xgb_predict.target
 USING sqlflow_models.my_xgb_regression_model;`)
 	_, _, err := connectAndRunSQL(predSQL)
 	if err != nil {
 
@@ -89,7 +89,7 @@ The column `c1` is dense encoded and `c2` is sparse encoded, `c3` is label colum
 select 
   c1, c2, c3 as class
 from kaggle_credit_fraud_training_data
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH
   ...
 COLUMN
@@ -148,7 +148,7 @@ Here is an example which do `BUCKETIZED` on `c2` then `CROSS` with `c1`.
 select 
     c1, c2, c3 as class
 from kaggle_credit_fraud_training_data
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH
   ...
 COLUMN
@@ -162,7 +162,7 @@ Feature Expressions except for Tensorflow Feature Column API should raise an err
 ```sql
 /* Not supported */
 select * from kaggle_credit_fraud_training_data
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH
     ...
 COLUMN
@@ -206,7 +206,7 @@ Let's create a DNNClassifier example, the minimum parameters of the constructor
 select 
     c1, c2, c3 as class
 from kaggle_credit_fraud_training_data
-TRAIN DNNClassifier
+TO TRAIN DNNClassifier
 WITH
     estimator.hidden_units = [10, 20],
     train_spec.max_steps = 2000,
@@ -223,7 +223,7 @@ For now, we will pass the result of snippet code as `feature_columns` parameters
 select 
     c1, c2, c3, c4, c5 as class
 from kaggle_credit_fraud_training_data
-TRAIN DNNLinearCombinedClassifier
+TO TRAIN DNNLinearCombinedClassifier
 WITH
   linear_feature_columns = [fc1, fc2]
   dnn_feature_columns = [fc3]
@@ -234,4 +234,4 @@ COLUMN
   CROSS([fc1, fc2, f3]) as fc3
 LABEL class
 ...
-```
+```
@@ -8,13 +8,13 @@ This design doc introduces how to support the `Analyze SQL` in SQLFlow with SHAP
 
 ## User Interface
 
-Users usually use a **TRAIN SQL** to train a model and then analyze the model using an **ANALYZE SQL**, the simple pipeline like:
+Users usually use a **TO TRAIN SQL** to train a model and then analyze the model using an **ANALYZE SQL**, the simple pipeline like:
 
 Train SQL:
 
 ``` sql
 SELECT * FROM train_table
-TRAIN xgboost.Estimator
+TO TRAIN xgboost.Estimator
 WITH
     train.objective = "reg:linear"
 COLUMN x
 
@@ -33,7 +33,7 @@ Comparing to python API provided by `xgboost`, it is easier to build a python co
 ### User Experience
 
 In terms of sqlflow users, xgboost is an alternative `Estimator` like `TensorFlow Estimators`. 
-Working with xgboost is quite similar to working with TensorFlow Estimators; just change `TRAIN DNNClassifier` into `TRAIN XGBoostEstimator`. 
+Working with xgboost is quite similar to working with TensorFlow Estimators; just change `TO TRAIN DNNClassifier` into `TO TRAIN XGBoostEstimator`. 
 
 In addition, xgboost specific parameters can be configured in the same way as TensorFlow parameters. 
 
@@ -44,7 +44,7 @@ Below is a demo about training/predicting via xgboost :
 select 
     c1, c2, c3, c4, c5 as class
 from kaggle_credit_fraud_training_data
-TRAIN XGBoostEstimator
+TO TRAIN XGBoostEstimator
 WITH
   booster = "gbtree"
   objective = "logistic:binary"
@@ -62,7 +62,7 @@ INTO sqlflow_models.xgboost_model_table;
 select 
     c1, c2, c3, c4
 from kaggle_credit_fraud_development_data
-PREDICT kaggle_credit_fraud_development_data.class
+TO PREDICT kaggle_credit_fraud_development_data.class
 USING sqlflow_models.xgboost_model_table;
 ```
 
 
@@ -22,13 +22,13 @@ The figure below demonstrates the overall workflow for cluster model training, w
 
 In this scenario, we focus on the extraction of data patterns in unsupervised learning. 
 
-So, the user can use `TRAIN` keyword to training a model. The user can also specify the training hyper-parameters with the keyword `WITH` and determine whether to use pre-trained model by `USING`. The training and predicting syntax looks like:
+So, the user can use `TO TRAIN` keyword to training a model. The user can also specify the training hyper-parameters with the keyword `WITH` and determine whether to use pre-trained model by `USING`. The training and predicting syntax looks like:
 
-TRAIN SQL:
+TO TRAIN SQL:
 
 ``` sql
 SELECT * FROM input_table
-TRAIN clusterModel
+TO TRAIN clusterModel
 WITH
     model.encode_units = [100, 7]
     model.n_clusters = 5
@@ -38,12 +38,12 @@ USING existed_pretrain_model
 INTO my_cluster_model;
 ```
 
-PREDICT SQL:
+TO PREDICT SQL:
 
 ``` sql
 SELECT *
 FROM input_table
-PREDICT output_table.group_id
+TO PREDICT output_table.group_id
 USING my_cluster_model;
 ```
 
@@ -108,7 +108,7 @@ Therefore, there are four cases in total:
 
 - In the first stage of the clustering model on SQLFlow, we plan to achieve the `first case`. We will achieve the other cases in the later. 
 
-- Users can use the trained cluster model in ` PREDICT SQL` to predict the group of input_table to get output_table.
+- Users can use the trained cluster model in ` TO PREDICT SQL` to predict the group of input_table to get output_table.
 
 - Finally, the user can perform a combined aggregation operation on the output_table based on the SQL statement to obtain a result_table, which can be saved to the local dataframe and then analyzed according to his own needs.
 
 
@@ -8,7 +8,7 @@ SQLFlow calls Go's [standard database API](https://golang.org/pkg/database/sql/)
 
 ### Data Retrieval
 
-The basic idea of SQLFlow is to extend the SELECT statement of SQL to have the TRAIN and PREDICT clauses.  For more discussion, please refer to the [syntax design](syntax.md).  SQLFlow translates such "extended SQL statements" into submitter programs, which forward the part from SELECT to TRAIN or PREDICT, which we call the "standard part", to the SQL engine.  SQLFlow also accepts the SELECT statement without TRAIN or PREDICT clauses and would forward such "standard statements" to the engine.  It is noticeable that the "standard part" or "standard statements" are not standardized.  For example, various engines use different syntax for `FULL OUTER JOIN`.
+The basic idea of SQLFlow is to extend the SELECT statement of SQL to have the TRAIN and PREDICT clauses.  For more discussion, please refer to the [syntax design](syntax.md).  SQLFlow translates such "extended SQL statements" into submitter programs, which forward the part from SELECT to TO TRAIN or TO PREDICT, which we call the "standard part", to the SQL engine.  SQLFlow also accepts the SELECT statement without TO TRAIN or TO PREDICT clauses and would forward such "standard statements" to the engine.  It is noticeable that the "standard part" or "standard statements" are not standardized.  For example, various engines use different syntax for `FULL OUTER JOIN`.
 
 - Hive supports `FULL OUTER JOIN` directly.
 - MySQL doesn't have `FULL OUTER JOIN`. However, a user can emulates `FULL OUTER JOIN` using `LEFT JOIN`, `UNION` and `RIGHT JOIN`.
@@ -24,7 +24,7 @@ SELECT
       name,
       age,
       income 
-FROM  employee TRAIN DNNRegressor 
+FROM  employee TO TRAIN DNNRegressor 
 WITH  hidden_layers=[10,50,10] 
 COLUMN name, agee LABEL income;
 ```