Skip to content

Commit

Permalink
Add a MultiTermAwareComponent marker interface to analysis factories. e…
Browse files Browse the repository at this point in the history
…lastic#19028

This is the same as what Lucene does for its analysis factories, and we hawe
tests that make sure that the elasticsearch factories are in sync with
Lucene's. This is a first step to move forward on elastic#9978 and elastic#18064.
  • Loading branch information
jpountz committed Jun 23, 2016
1 parent 6c8744e commit 7ba5bce
Show file tree
Hide file tree
Showing 28 changed files with 690 additions and 207 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
/**
* Factory for ASCIIFoldingFilter.
*/
public class ASCIIFoldingTokenFilterFactory extends AbstractTokenFilterFactory {
public class ASCIIFoldingTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {
public static ParseField PRESERVE_ORIGINAL = new ParseField("preserve_original");
public static boolean DEFAULT_PRESERVE_ORIGINAL = false;

Expand All @@ -44,4 +44,9 @@ public ASCIIFoldingTokenFilterFactory(IndexSettings indexSettings, Environment e
public TokenStream create(TokenStream tokenStream) {
return new ASCIIFoldingFilter(tokenStream, preserveOriginal);
}

@Override
public Object getMultiTermComponent() {
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
/**
*
*/
public class ArabicNormalizationFilterFactory extends AbstractTokenFilterFactory {
public class ArabicNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {

public ArabicNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
Expand All @@ -37,4 +37,9 @@ public ArabicNormalizationFilterFactory(IndexSettings indexSettings, Environment
public TokenStream create(TokenStream tokenStream) {
return new ArabicNormalizationFilter(tokenStream);
}

@Override
public Object getMultiTermComponent() {
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;

public final class CJKWidthFilterFactory extends AbstractTokenFilterFactory {
public final class CJKWidthFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {

public CJKWidthFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
Expand All @@ -36,4 +36,9 @@ public TokenStream create(TokenStream tokenStream) {
return new CJKWidthFilter(tokenStream);
}

@Override
public Object getMultiTermComponent() {
return this;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
/**
* Factory for {@link DecimalDigitFilter}
*/
public final class DecimalDigitFilterFactory extends AbstractTokenFilterFactory {
public final class DecimalDigitFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {

public DecimalDigitFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
Expand All @@ -38,4 +38,9 @@ public DecimalDigitFilterFactory(IndexSettings indexSettings, Environment env, S
public TokenStream create(TokenStream tokenStream) {
return new DecimalDigitFilter(tokenStream);
}

@Override
public Object getMultiTermComponent() {
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
/**
*
*/
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory {
public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {

private final CharArraySet articles;

Expand All @@ -42,4 +42,9 @@ public ElisionTokenFilterFactory(IndexSettings indexSettings, Environment env, S
public TokenStream create(TokenStream tokenStream) {
return new ElisionFilter(tokenStream, articles);
}

@Override
public Object getMultiTermComponent() {
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
/**
* Factory for {@link GermanNormalizationFilter}
*/
public class GermanNormalizationFilterFactory extends AbstractTokenFilterFactory {
public class GermanNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {

public GermanNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
Expand All @@ -38,4 +38,8 @@ public TokenStream create(TokenStream tokenStream) {
return new GermanNormalizationFilter(tokenStream);
}

@Override
public Object getMultiTermComponent() {
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
/**
* Factory for {@link HindiNormalizationFilter}
*/
public class HindiNormalizationFilterFactory extends AbstractTokenFilterFactory {
public class HindiNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {

public HindiNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
Expand All @@ -38,4 +38,8 @@ public TokenStream create(TokenStream tokenStream) {
return new HindiNormalizationFilter(tokenStream);
}

@Override
public Object getMultiTermComponent() {
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
/**
* Factory for {@link IndicNormalizationFilter}
*/
public class IndicNormalizationFilterFactory extends AbstractTokenFilterFactory {
public class IndicNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {

public IndicNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
Expand All @@ -38,4 +38,8 @@ public TokenStream create(TokenStream tokenStream) {
return new IndicNormalizationFilter(tokenStream);
}

@Override
public Object getMultiTermComponent() {
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
* <li>turkish: {@link TurkishLowerCaseFilter}
* </ul>
*/
public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory {
public class LowerCaseTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {

private final String lang;

Expand All @@ -60,6 +60,11 @@ public TokenStream create(TokenStream tokenStream) {
throw new IllegalArgumentException("language [" + lang + "] not support for lower case");
}
}

@Override
public Object getMultiTermComponent() {
return this;
}
}


Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
/**
*
*/
public class LowerCaseTokenizerFactory extends AbstractTokenizerFactory {
public class LowerCaseTokenizerFactory extends AbstractTokenizerFactory implements MultiTermAwareComponent {

public LowerCaseTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
Expand All @@ -38,4 +38,9 @@ public LowerCaseTokenizerFactory(IndexSettings indexSettings, Environment enviro
public Tokenizer create() {
return new LowerCaseTokenizer();
}

@Override
public Object getMultiTermComponent() {
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class MappingCharFilterFactory extends AbstractCharFilterFactory {
public class MappingCharFilterFactory extends AbstractCharFilterFactory implements MultiTermAwareComponent {

private final NormalizeCharMap normMap;

Expand Down Expand Up @@ -114,4 +114,9 @@ private String parseString(String s) {
}
return new String(out, 0, writePos);
}

@Override
public Object getMultiTermComponent() {
return this;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.index.analysis;

/** Elasticsearch counterpart of {@link org.apache.lucene.analysis.util.MultiTermAwareComponent}. */
public interface MultiTermAwareComponent {

/** Returns an analysis component to handle analysis if multi-term queries.
* The returned component must be a TokenizerFactory, TokenFilterFactory or CharFilterFactory.
*/
public Object getMultiTermComponent();

}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
/**
*
*/
public class PersianNormalizationFilterFactory extends AbstractTokenFilterFactory {
public class PersianNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {

public PersianNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
Expand All @@ -38,4 +38,8 @@ public TokenStream create(TokenStream tokenStream) {
return new PersianNormalizationFilter(tokenStream);
}

@Override
public Object getMultiTermComponent() {
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
/**
*
*/
public class SerbianNormalizationFilterFactory extends AbstractTokenFilterFactory {
public class SerbianNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {

public SerbianNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
Expand All @@ -37,4 +37,9 @@ public SerbianNormalizationFilterFactory(IndexSettings indexSettings, Environmen
public TokenStream create(TokenStream tokenStream) {
return new SerbianNormalizationFilter(tokenStream);
}

@Override
public Object getMultiTermComponent() {
return this;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
/**
* Factory for {@link SoraniNormalizationFilter}
*/
public class SoraniNormalizationFilterFactory extends AbstractTokenFilterFactory {
public class SoraniNormalizationFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {

public SoraniNormalizationFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
Expand All @@ -38,4 +38,9 @@ public TokenStream create(TokenStream tokenStream) {
return new SoraniNormalizationFilter(tokenStream);
}

@Override
public Object getMultiTermComponent() {
return this;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
/**
*
*/
public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory {
public class UpperCaseTokenFilterFactory extends AbstractTokenFilterFactory implements MultiTermAwareComponent {

public UpperCaseTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
super(indexSettings, name, settings);
Expand All @@ -38,6 +38,11 @@ public UpperCaseTokenFilterFactory(IndexSettings indexSettings, Environment envi
public TokenStream create(TokenStream tokenStream) {
return new UpperCaseFilter(tokenStream);
}

@Override
public Object getMultiTermComponent() {
return this;
}
}


0 comments on commit 7ba5bce

Please sign in to comment.