Skip to content

Commit

Permalink
TRUNK-4375 Remove stop words from query
Browse files Browse the repository at this point in the history
  • Loading branch information
rkorytkowski committed Jun 23, 2014
1 parent 51e2dae commit b17c387
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
Expand Down Expand Up @@ -600,7 +601,7 @@ public List<Concept> getConcepts(final String name, final Locale loc, final bool

final List<ConceptName> names = LuceneQuery.newQuery(query.toString(), sessionFactory.getCurrentSession(),
ConceptName.class).include("concept.datatype.conceptDatatypeId", transformToIds(datatypes)).include(
"concept.conceptClass.conceptClassId", transformToIds(classes)).include("concept.retired", "false").skipSame(
"concept.conceptClass.conceptClassId", transformToIds(classes)).include("concept.retired", false).skipSame(
"concept.conceptId").list();

final List<Concept> concepts = Lists.transform(names, transformNameToConcept);
Expand All @@ -609,22 +610,24 @@ public List<Concept> getConcepts(final String name, final Locale loc, final bool
}

private String newNamesQuery(final Set<Locale> locales, final String name, final boolean keywords) {
final StringBuilder query = new StringBuilder();
final String escapedName = LuceneQuery.escapeQuery(name);

final StringBuilder query = new StringBuilder();
query.append("(");
if (keywords) {
query.append(" name:(" + name + ")^0.2");
List<String> words = tokenizeName(escapedName, locales);

query.append(" name:(" + StringUtils.join(words, " ") + ")^0.2");
//Put exact phrase higher
query.append(" OR name:(\"" + name + "\")^0.6");
query.append(" OR name:(\"" + escapedName + "\")^0.6");

//Include partial
String[] words = name.trim().split(" ");
query.append(" OR name:(" + StringUtils.join(words, "* ") + "*)^0.1");

//Include similar
query.append(" OR name:(" + StringUtils.join(words, "~0.8 ") + "~0.8)^0.1");
} else {
query.append(" name:\"" + LuceneQuery.escapeQuery(name) + "\"");
query.append(" name:\"" + escapedName + "\"");
}
query.append(")");

Expand All @@ -643,6 +646,24 @@ private String newNamesQuery(final Set<Locale> locales, final String name, final

return query.toString();
}

private List<String> tokenizeName(final String escapedName, final Set<Locale> locales) {
List<String> words = new ArrayList<String>();
words.addAll(Arrays.asList(escapedName.trim().split(" ")));

Set<String> stopWords = new HashSet<String>();
for (Locale locale : locales) {
stopWords.addAll(Context.getConceptService().getConceptStopWords(locale));
}

for (Iterator<String> it = words.iterator(); it.hasNext();) {
String word = it.next();
if (stopWords.contains(word.toUpperCase())) {
it.remove();
}
}
return words;
}

/**
* gets questions for the given answer concept
Expand Down Expand Up @@ -1364,7 +1385,7 @@ private LuceneQuery<ConceptName> createConceptNameQuery(final String phrase, Lis
}

if (!includeRetired) {
luceneQuery.include("concept.retired", "false");
luceneQuery.include("concept.retired", false);
}

luceneQuery.skipSame("concept.conceptId");
Expand Down
2 changes: 2 additions & 0 deletions api/src/test/java/org/openmrs/api/ConceptServiceTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2501,6 +2501,8 @@ public void getConcepts_shouldReturnASearchResultWhoseConceptNameContainsAllWord
@Verifies(value = "should return a search result for phrase with stop words", method = "getConcepts(String,List<QLocale;>,null,List<QConceptClass;>,List<QConceptClass;>,List<QConceptDatatype;>,List<QConceptDatatype;>,Concept,Integer,Integer)")
public void getConcepts_shouldReturnASearchResultForPhraseWithStopWords() throws Exception {
executeDataSet("org/openmrs/api/include/ConceptServiceTest-names.xml");
conceptService.saveConceptStopWord(new ConceptStopWord("OF", Locale.US));

List<ConceptSearchResult> searchResults = conceptService.getConcepts("tuberculosis of knee", Collections
.singletonList(new Locale("en", "US")), false, null, null, null, null, null, null, null);

Expand Down

0 comments on commit b17c387

Please sign in to comment.