-
Notifications
You must be signed in to change notification settings - Fork 102
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add batch query capabilites to elastic io #462
Changes from all commits
cfefc2e
a20fee8
136eb35
237f726
8253fe8
8c61819
c8a22b9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,17 +33,13 @@ | |
import org.elasticsearch.client.Client; | ||
import org.elasticsearch.common.xcontent.XContentBuilder; | ||
import org.elasticsearch.common.xcontent.XContentFactory; | ||
import org.elasticsearch.discovery.Discovery; | ||
import org.elasticsearch.index.query.BoolQueryBuilder; | ||
import org.elasticsearch.search.SearchHit; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.*; | ||
|
||
import static com.rackspacecloud.blueflood.io.ElasticIO.ESFieldLabel.*; | ||
import static org.elasticsearch.index.query.QueryBuilders.boolQuery; | ||
|
@@ -68,6 +64,7 @@ static enum ESFieldLabel { | |
private final Timer writeTimer = Metrics.timer(ElasticIO.class, "Write Duration"); | ||
private final Histogram batchHistogram = Metrics.histogram(ElasticIO.class, "Batch Sizes"); | ||
private Meter classCastExceptionMeter = Metrics.meter(ElasticIO.class, "Failed Cast to IMetric"); | ||
private Histogram queryBatchHistogram = Metrics.histogram(ElasticIO.class, "Query Batch Size"); | ||
|
||
public ElasticIO() { | ||
this(RemoteElasticSearchServer.getInstance()); | ||
|
@@ -143,34 +140,38 @@ private IndexRequestBuilder createSingleRequest(Discovery md) throws IOException | |
} | ||
|
||
public List<SearchResult> search(String tenant, String query) throws Exception { | ||
// complain if someone is trying to search specifically on any tenant. | ||
if (query.indexOf(tenantId.name()) >= 0) { | ||
throw new Exception("Illegal query: " + query); | ||
} | ||
|
||
return search(tenant, Arrays.asList(query)); | ||
} | ||
|
||
public List<SearchResult> search(String tenant, List<String> queries) throws Exception { | ||
List<SearchResult> results = new ArrayList<SearchResult>(); | ||
Timer.Context searchTimerCtx = searchTimer.time(); | ||
|
||
// todo: we'll want to change this once we decide and use a query syntax in the query string. | ||
BoolQueryBuilder qb = boolQuery() | ||
.must(termQuery(tenantId.toString(), tenant)) | ||
.must( | ||
query.contains("*") ? | ||
wildcardQuery(metric_name.name(), query) : | ||
termQuery(metric_name.name(), query) | ||
); | ||
Timer.Context multiSearchCtx = searchTimer.time(); | ||
queryBatchHistogram.update(queries.size()); | ||
BoolQueryBuilder qb = boolQuery(); | ||
|
||
for (String query : queries) { | ||
qb.should(boolQuery() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What does should do? Does it expect always all queries to return some results? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @tilogaat |
||
.must(termQuery(tenantId.toString(), tenant)) | ||
.must( | ||
query.contains("*") ? | ||
wildcardQuery(metric_name.name(), query) : | ||
termQuery(metric_name.name(), query) | ||
)); | ||
} | ||
|
||
SearchResponse response = client.prepareSearch(INDEX_NAME) | ||
.setRouting(tenant) | ||
.setSize(100000) | ||
.setVersion(true) | ||
.setQuery(qb) | ||
.execute() | ||
.actionGet(); | ||
searchTimerCtx.stop(); | ||
multiSearchCtx.stop(); | ||
for (SearchHit hit : response.getHits().getHits()) { | ||
SearchResult result = convertHitToMetricDiscoveryResult(hit); | ||
results.add(result); | ||
} | ||
|
||
return results; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -41,7 +41,6 @@ public class ElasticIOTest { | |
private static final int NUM_DOCS = NUM_PARENT_ELEMENTS * CHILD_ELEMENTS.size() * NUM_GRANDCHILD_ELEMENTS; | ||
private static final String TENANT_A = "ratanasv"; | ||
private static final String TENANT_B = "someotherguy"; | ||
|
||
private static final String TENANT_C = "someothergal"; | ||
private static final String UNIT = "horse length"; | ||
private static final Map<String, List<Locator>> locatorMap = new HashMap<String, List<Locator>>(); | ||
|
@@ -138,6 +137,48 @@ public void testWildcardForPreaggregatedMetric() throws Exception { | |
testWildcard(TENANT_C, null); | ||
} | ||
|
||
@Test | ||
public void testBatchQueryWithNoWildCards() throws Exception { | ||
String tenantId = TENANT_A; | ||
String query1 = "one.two.three00.fourA.five1"; | ||
String query2 = "one.two.three01.fourA.five2"; | ||
List<SearchResult> results; | ||
ArrayList<String> queries = new ArrayList<String>(); | ||
queries.add(query1); | ||
queries.add(query2); | ||
results = elasticIO.search(tenantId, queries); | ||
Assert.assertEquals(results.size(), 2); //we searched for 2 unique metrics | ||
results.contains(new SearchResult(TENANT_A, query1, UNIT)); | ||
results.contains(new SearchResult(TENANT_A, query2, UNIT)); | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens if query2 = "foo.bar" Since it does not exist will it return the results from query1 or not return any results? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @tilogaat If the query2 = "foo.bar" i.e we are querying for something which does not exist, then it will match query1 and return the results of just that query. |
||
@Test | ||
public void testBatchQueryWithWildCards() throws Exception { | ||
String tenantId = TENANT_A; | ||
String query1 = "one.two.three00.fourA.*"; | ||
String query2 = "one.two.*.fourA.five2"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we add a test with the wildcard in the beginning *.two ... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
List<SearchResult> results; | ||
ArrayList<String> queries = new ArrayList<String>(); | ||
queries.add(query1); | ||
queries.add(query2); | ||
results = elasticIO.search(tenantId, queries); | ||
// query1 will return 3 results, query2 will return 30 results, but we get back 32 because of intersection | ||
Assert.assertEquals(results.size(), 32); | ||
} | ||
|
||
@Test | ||
public void testBatchQueryWithWildCards2() throws Exception { | ||
String tenantId = TENANT_A; | ||
String query1 = "*.two.three00.fourA.five1"; | ||
String query2 = "*.two.three01.fourA.five2"; | ||
List<SearchResult> results; | ||
ArrayList<String> queries = new ArrayList<String>(); | ||
queries.add(query1); | ||
queries.add(query2); | ||
results = elasticIO.search(tenantId, queries); | ||
Assert.assertEquals(results.size(), 2); | ||
} | ||
|
||
public void testWildcard(String tenantId, String unit) throws Exception { | ||
SearchResult entry; | ||
List<SearchResult> results; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do we not need this anymore?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@tilogaat I am not sure what this line of code was doing. The original intent seemed to not allow a query which is cross-tenant, like tenant A cannot search for metrics belonging to tenant B. But all it was doing was searching to see if the string
tenantId
does not exist in the query.For what its worth, I checked whether cross-tenant discovery is possible, and it seems like its not. We index
tenantId
along withmetricName
andunit
as terms, and while searching we pass the tenantId from the uri and use it as amust
contain match, so its impossible to search for cross-tenant metrics.