Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Commit

Permalink
Merge #584, #600, #609 for OD 1.8 (#658)
Browse files Browse the repository at this point in the history
* Issue 584 Fix object/nested field select issue

* Issue 600 Fix CAST bool to integer issue

* Issue 609 Support queries end with semi colon
  • Loading branch information
dai-chen committed Aug 4, 2020
1 parent a9538cb commit a8ae9e0
Show file tree
Hide file tree
Showing 18 changed files with 359 additions and 41 deletions.
2 changes: 1 addition & 1 deletion src/main/antlr/OpenDistroSqlParser.g4
Expand Up @@ -32,7 +32,7 @@ options { tokenVocab=OpenDistroSqlLexer; }

// Root rule
root
: sqlStatement? EOF
: sqlStatement? SEMI? EOF
;

// Only SELECT, DELETE, SHOW and DSCRIBE are supported for now
Expand Down
Expand Up @@ -16,6 +16,7 @@
package com.amazon.opendistroforelasticsearch.sql.esdomain.mapping;

import com.amazon.opendistroforelasticsearch.sql.domain.Field;
import com.amazon.opendistroforelasticsearch.sql.executor.format.DescribeResultSet;
import com.amazon.opendistroforelasticsearch.sql.utils.StringUtils;

import java.util.Map;
Expand Down Expand Up @@ -119,24 +120,26 @@ public String path() {
}

/**
* Used to retrieve the type of fields from metaData map structures for both regular and nested fields
* Find field type in ES Get Field Mapping API response. Note that Get Field Mapping API does NOT return
* the type for object or nested field. In this case, object type is used as default under the assumption
* that the field queried here must exist (which is true if semantic analyzer is enabled).
*
* @return field type if found in mapping, otherwise "object" type returned
*/
@SuppressWarnings("unchecked")
public String type() {
FieldMappingMetaData metaData = typeMappings.get(fieldName);
FieldMappingMetaData metaData = typeMappings.getOrDefault(fieldName, FieldMappingMetaData.NULL);
if (metaData.isNull()) {
return DescribeResultSet.DEFAULT_OBJECT_DATATYPE;
}

Map<String, Object> source = metaData.sourceAsMap();
String[] fieldPath = fieldName.split("\\.");

/*
* When field is not nested the metaData source is fieldName -> type
* When it is nested or contains "." in general (ex. fieldName.nestedName) the source is nestedName -> type
*/
String root = (fieldPath.length == 1) ? fieldName : fieldPath[1];
Map<String, Object> fieldMapping = (Map<String, Object>) source.get(root);
for (int i = 2; i < fieldPath.length; i++) {
fieldMapping = (Map<String, Object>) fieldMapping.get(fieldPath[i]);
}

// For object/nested field, fieldName is full path though only innermost field name present in mapping
// For example, fieldName='employee.location.city', metaData='{"city":{"type":"text"}}'
String innermostFieldName = (fieldPath.length == 1) ? fieldName : fieldPath[fieldPath.length - 1];
Map<String, Object> fieldMapping = (Map<String, Object>) source.get(innermostFieldName);
return (String) fieldMapping.get("type");
}

Expand Down
Expand Up @@ -40,7 +40,7 @@ public class DescribeResultSet extends ResultSet {
* You are not required to set the field type to object explicitly, as this is the default value.
* https://www.elastic.co/guide/en/elasticsearch/reference/current/object.html
*/
private static final String DEFAULT_OBJECT_DATATYPE = "object";
public static final String DEFAULT_OBJECT_DATATYPE = "object";

private IndexStatement statement;
private Object queryResult;
Expand Down
Expand Up @@ -114,6 +114,8 @@ public enum Type {
DATE, // Date types
BOOLEAN, // Boolean types
BINARY, // Binary types
OBJECT,
NESTED,
INTEGER_RANGE, FLOAT_RANGE, LONG_RANGE, DOUBLE_RANGE, DATE_RANGE; // Range types

public String nameLowerCase() {
Expand Down
Expand Up @@ -437,6 +437,7 @@ private List<Schema.Column> populateColumns(Query query, String[] fieldNames, Ma
// _score is a special case since it is not included in typeMappings, so it is checked for here
if (fieldName.equals(SCORE)) {
columns.add(new Schema.Column(fieldName, fetchAlias(fieldName, fieldMap), Schema.Type.FLOAT));
continue;
}
/*
* Methods are also a special case as their type cannot be determined from typeMappings, so it is checked
Expand Down Expand Up @@ -465,6 +466,7 @@ private List<Schema.Column> populateColumns(Query query, String[] fieldNames, Ma
fetchMethodReturnType(fieldIndex, methodField)
)
);
continue;
}

/*
Expand All @@ -473,7 +475,7 @@ private List<Schema.Column> populateColumns(Query query, String[] fieldNames, Ma
* explicitly selected.
*/
FieldMapping field = new FieldMapping(fieldName, typeMappings, fieldMap);
if (typeMappings.containsKey(fieldName) && !field.isMetaField()) {
if (!field.isMetaField()) {

if (field.isMultiField() && !field.isSpecified()) {
continue;
Expand Down
Expand Up @@ -87,8 +87,11 @@ public static QueryAction create(Client client, String sql)
public static QueryAction create(Client client, QueryActionRequest request)
throws SqlParseException, SQLFeatureNotSupportedException {
String sql = request.getSql();
// Linebreak matcher
// Remove line breaker anywhere and semicolon at the end
sql = sql.replaceAll("\\R", " ").trim();
if (sql.endsWith(";")) {
sql = sql.substring(0, sql.length() - 1);
}

switch (getFirstWord(sql)) {
case "SELECT":
Expand Down
Expand Up @@ -973,13 +973,10 @@ public String getCastScriptStatement(String name, String castType, List<KVValue>
String castFieldName = String.format("doc['%s'].value", paramers.get(0).toString());
switch (StringUtils.toUpper(castType)) {
case "INT":
return String.format("def %s = Double.parseDouble(%s.toString()).intValue()", name, castFieldName);
case "LONG":
return String.format("def %s = Double.parseDouble(%s.toString()).longValue()", name, castFieldName);
case "FLOAT":
return String.format("def %s = Double.parseDouble(%s.toString()).floatValue()", name, castFieldName);
case "DOUBLE":
return String.format("def %s = Double.parseDouble(%s.toString()).doubleValue()", name, castFieldName);
return getCastToNumericValueScript(name, castFieldName, StringUtils.toLower(castType));
case "STRING":
return String.format("def %s = %s.toString()", name, castFieldName);
case "DATETIME":
Expand All @@ -990,6 +987,14 @@ public String getCastScriptStatement(String name, String castType, List<KVValue>
}
}

private String getCastToNumericValueScript(String varName, String docValue, String targetType) {
String script =
"def %1$s = (%2$s instanceof boolean) "
+ "? (%2$s ? 1 : 0) "
+ ": Double.parseDouble(%2$s.toString()).%3$sValue()";
return StringUtils.format(script, varName, docValue, targetType);
}

/**
* Returns return type of script function. This is simple approach, that might be not the best solution in the long
* term. For example - for JDBC, if the column type in index is INTEGER, and the query is "select column+5", current
Expand Down
Expand Up @@ -79,7 +79,7 @@ public void missingWhereKeywordShouldThrowException() {
expectValidationFailWithErrorMessage(
"SELECT * FROM accounts age = 1",
"offending symbol [=]", // parser thought 'age' is alias of 'accounts' and failed at '='
"Expecting", "'WHERE'" // "Expecting tokens in {<EOF>, 'INNER', 'JOIN', ... 'WHERE', ','}"
"Expecting", ";" // "Expecting tokens in {<EOF>, ';'}"
);
}

Expand Down Expand Up @@ -130,6 +130,11 @@ public void arithmeticExpressionInWhereClauseShouldPass() {
validate("SELECT * FROM accounts WHERE age + 1 = 10");
}

@Test
public void queryEndWithSemiColonShouldPass() {
validate("SELECT * FROM accounts;");
}

private void expectValidationFailWithErrorMessage(String query, String... messages) {
exception.expect(SyntaxAnalysisException.class);
exception.expectMessage(allOf(Arrays.stream(messages).
Expand Down
Expand Up @@ -15,18 +15,20 @@

package com.amazon.opendistroforelasticsearch.sql.esdomain.mapping;

import static java.util.Collections.emptyMap;
import static org.elasticsearch.action.admin.indices.mapping.get.GetFieldMappingsResponse.FieldMappingMetaData;
import static org.hamcrest.Matchers.is;
import static org.junit.Assert.assertThat;

import com.amazon.opendistroforelasticsearch.sql.domain.Field;
import com.amazon.opendistroforelasticsearch.sql.util.MatcherUtils;
import org.hamcrest.Matcher;
import org.junit.Test;

import com.google.common.collect.ImmutableMap;
import java.util.Arrays;
import java.util.Map;
import java.util.stream.Collectors;

import static java.util.Collections.emptyMap;
import static org.hamcrest.Matchers.is;
import static org.junit.Assert.assertThat;
import org.elasticsearch.common.bytes.BytesArray;
import org.hamcrest.Matcher;
import org.junit.Test;

/**
* Unit test for {@code FieldMapping} with trivial methods ignored such as isSpecified, isMetaField etc.
Expand Down Expand Up @@ -81,6 +83,35 @@ public void testMultiFieldIsNotProperty() {
);
}

@Test
public void testUnknownFieldTreatedAsObject() {
assertThat(
new FieldMapping("employee"),
hasType("object")
);
}

@Test
public void testDeepNestedField() {
assertThat(
new FieldMapping(
"employee.location.city",
ImmutableMap.of(
"employee.location.city",
new FieldMappingMetaData("employee.location.city", new BytesArray(
"{\n" +
" \"city\" : {\n" +
" \"type\" : \"text\"\n" +
" }\n" +
"}")
)
),
emptyMap()
),
hasType("text")
);
}

private Matcher<FieldMapping> isWildcardSpecified(boolean isMatched) {
return MatcherUtils.featureValueOf("is field match wildcard specified in query",
is(isMatched),
Expand All @@ -93,6 +124,12 @@ private Matcher<FieldMapping> isPropertyField(boolean isProperty) {
FieldMapping::isPropertyField);
}

private Matcher<FieldMapping> hasType(String expected) {
return MatcherUtils.featureValueOf("type",
is(expected),
FieldMapping::type);
}

private Map<String, Field> fieldsSpecifiedInQuery(String...fieldNames) {
return Arrays.stream(fieldNames).
collect(Collectors.toMap(name -> name,
Expand Down
@@ -0,0 +1,127 @@
/*
* Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*
*/

package com.amazon.opendistroforelasticsearch.sql.esintgtest;

import static com.amazon.opendistroforelasticsearch.sql.esintgtest.TestsConstants.TEST_INDEX_DEEP_NESTED;
import static com.amazon.opendistroforelasticsearch.sql.util.MatcherUtils.rows;
import static com.amazon.opendistroforelasticsearch.sql.util.MatcherUtils.schema;
import static com.amazon.opendistroforelasticsearch.sql.util.MatcherUtils.verifyDataRows;
import static com.amazon.opendistroforelasticsearch.sql.util.MatcherUtils.verifySchema;

import com.amazon.opendistroforelasticsearch.sql.utils.StringUtils;
import org.json.JSONArray;
import org.json.JSONObject;
import org.junit.Test;

/**
* Integration test for Elasticsearch object field (and nested field).
* This class is focused on simple SELECT-FROM query to ensure right column
* number and value is returned.
*/
public class ObjectFieldSelectIT extends SQLIntegTestCase {

@Override
protected void init() throws Exception {
loadIndex(Index.DEEP_NESTED);
}

@Test
public void testSelectObjectFieldItself() {
JSONObject response = new JSONObject(query("SELECT city FROM %s"));

verifySchema(response, schema("city", null, "object"));

// Expect object field itself is returned in a single cell
verifyDataRows(response,
rows(new JSONObject(
"{\n"
+ " \"name\": \"Seattle\",\n"
+ " \"location\": {\"latitude\": 10.5}\n"
+ "}")
)
);
}

@Test
public void testSelectObjectInnerFields() {
JSONObject response = new JSONObject(query(
"SELECT city.location, city.location.latitude FROM %s"));

verifySchema(response,
schema("city.location", null, "object"),
schema("city.location.latitude", null, "double")
);

// Expect inner regular or object field returned in its single cell
verifyDataRows(response,
rows(
new JSONObject("{\"latitude\": 10.5}"),
10.5
)
);
}

@Test
public void testSelectNestedFieldItself() {
JSONObject response = new JSONObject(query("SELECT projects FROM %s"));

// Nested field is absent in ES Get Field Mapping response either hence "object" used
verifySchema(response, schema("projects", null, "object"));

// Expect nested field itself is returned in a single cell
verifyDataRows(response,
rows(new JSONArray(
"[\n"
+ " {\"name\": \"AWS Redshift Spectrum querying\"},\n"
+ " {\"name\": \"AWS Redshift security\"},\n"
+ " {\"name\": \"AWS Aurora security\"}\n"
+ "]")
)
);
}

@Test
public void testSelectObjectFieldOfArrayValuesItself() {
JSONObject response = new JSONObject(query("SELECT accounts FROM %s"));

// Expect the entire list of values is returned just like a nested field
verifyDataRows(response,
rows(new JSONArray(
"[\n"
+ " {\"id\": 1},\n"
+ " {\"id\": 2}\n"
+ "]")
)
);
}

@Test
public void testSelectObjectFieldOfArrayValuesInnerFields() {
JSONObject response = new JSONObject(query("SELECT accounts.id FROM %s"));

// We don't support flatten object field of list value so expect null returned
verifyDataRows(response, rows(JSONObject.NULL));
}

private String query(String sql) {
return executeQuery(
StringUtils.format(sql, TEST_INDEX_DEEP_NESTED),
"jdbc"
);
}

}
Expand Up @@ -90,6 +90,11 @@ protected void init() throws Exception {
loadIndex(Index.BANK_WITH_NULL_VALUES);
}

@Test
public void queryEndWithSemiColonTest() {
executeQuery(StringUtils.format("SELECT * FROM %s;", TEST_INDEX_BANK), "jdbc");
}

@Test
public void searchTypeTest() throws IOException {
JSONObject response = executeQuery(String.format(Locale.ROOT, "SELECT * FROM %s LIMIT 1000",
Expand Down

0 comments on commit a8ae9e0

Please sign in to comment.