Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@ This project provides three analyzers that are intended for different contexts.
* The `phone-email` analyzer extends the `phone` analyzer with additional tokenization for email addresses (e.g. generating tokens for the user part and the domain part of an email address).
* The `phone-search` analyzer is intended to be used as a `search_analyzer` with one of the other two analyzers used for indexing. It does minimal tokenization: If a term starts with `sip:` or `tel:` it strips this part and generates a token for it. The analyzer also strips a leading `+` from phone numbers.

All three analyzers remove non-unique tokens and transform terms to lowercase.


## Example inputs

Expand Down Expand Up @@ -48,6 +46,7 @@ Input (with country code): `sip:+13169410766;ext=2233@172.17.10.117:8060`
Tokens:

```
sip:+13169410766;ext=2233@172.17.10.117:8060
sip:
13169410766;ext=2233@172.17.10.117:8060
13169410766;ext=2233
Expand Down Expand Up @@ -81,6 +80,7 @@ Input (without a country code): `tel:8177148350`
Tokens:

```
tel:8177148350
tel:
8177148350
8
Expand Down
46 changes: 35 additions & 11 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
<groupId>com.inin.analytics</groupId>
<artifactId>elasticsearch-phone</artifactId>
<packaging>jar</packaging>
<version>1.0.2</version>
<version>1.0.2-SNAPSHOT</version>
<name>elasticsearch-phone</name>
<description>Elasticsearch Plugin for Phone and SIP Analysis</description>
<url>https://github.com/MyPureCloud/elasticsearch-phone</url>
Expand Down Expand Up @@ -47,6 +47,28 @@
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>inin-release</id>
<name>ININ Release Repository</name>
<url>https://purecloud.artifactoryonline.com/purecloud/inin-release</url>
<releases>
<enabled>true</enabled>
</releases>
<snapshots>
<enabled>false</enabled>
</snapshots>
</repository>
<repository>
<id>inin-snapshot</id>
<name>ININ Snapshot Repository</name>
<url>https://purecloud.artifactoryonline.com/purecloud/inin-snapshot</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository>
</repositories>

<dependencies>
Expand Down Expand Up @@ -126,16 +148,18 @@
</developer>
</developers>

<distributionManagement>
<snapshotRepository>
<id>ossrh</id>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
</snapshotRepository>
<repository>
<id>ossrh</id>
<url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
</repository>
</distributionManagement>
<distributionManagement>
<repository>
<id>inin-release</id>
<name>ININ Release Repository</name>
<url>https://purecloud.artifactoryonline.com/purecloud/inin-release</url>
</repository>
<snapshotRepository>
<id>inin-snapshot</id>
<name>ININ Snapshot Repository</name>
<url>https://purecloud.artifactoryonline.com/purecloud/inin-snapshot</url>
</snapshotRepository>
</distributionManagement>

<build>
<resources>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,13 @@

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.miscellaneous.UniqueTokenFilter;

public class PhoneAnalyzer extends Analyzer {

@Override
protected TokenStreamComponents createComponents(String field, Reader reader) {
Tokenizer tokenizer = new TermExtractorTokenizer(reader, new PhoneTermExtractor());
return new TokenStreamComponents(tokenizer, new LowerCaseFilter(new UniqueTokenFilter(tokenizer)));
return new TokenStreamComponents(tokenizer, new UniqueTokenFilter(tokenizer));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import java.io.Reader;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.miscellaneous.UniqueTokenFilter;

/**
Expand All @@ -14,6 +13,6 @@ public class PhoneEmailAnalyzer extends Analyzer {
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
TermExtractorTokenizer tokenizer = new TermExtractorTokenizer(reader, new PhoneTermExtractor(), new EmailTermExtractor());
return new TokenStreamComponents(tokenizer, new LowerCaseFilter(new UniqueTokenFilter(tokenizer)));
return new TokenStreamComponents(tokenizer, new UniqueTokenFilter(tokenizer));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,12 @@

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;

public class PhoneSearchAnalyzer extends Analyzer {

@Override
protected TokenStreamComponents createComponents(String field, Reader reader) {
Tokenizer tokenizer = new TermExtractorTokenizer(reader, new PhoneSearchTermExtractor());
return new TokenStreamComponents(tokenizer, new LowerCaseFilter(tokenizer));
return new TokenStreamComponents(tokenizer, tokenizer);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ public class PhoneTermExtractor implements TermExtractor {
@Override
public List<String> extractTerms(String input) {
List<String> tokens = new ArrayList<String>();
tokens.add(input);
// Rip off the "tel:" or "sip:" prefix
if (input.indexOf("tel:") == 0 || input.indexOf("sip:") == 0) {
tokens.add(input.substring(0, 4));
Expand Down
2 changes: 1 addition & 1 deletion src/test/java/tests/PhoneTokenizerIntegrationTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ public void testSipWithTelephoneExtension() throws ExecutionException, Interrupt

@Test
public void testSipWithUsername() throws ExecutionException, InterruptedException, IOException {
assertIncludes("sip:JeffSIP@178.12.220.18", Arrays.asList("jeffsip"));
assertIncludes("sip:JeffSIP@178.12.220.18", Arrays.asList("JeffSIP"));
}

@Test
Expand Down