diff --git a/es/index_settings.json b/es/index_settings.json index 85d2bcae1..4cd856c56 100644 --- a/es/index_settings.json +++ b/es/index_settings.json @@ -2,13 +2,22 @@ "analysis": { "analyzer": { "index_ngram": { + "char_filter": [ + "punctuationgreedy", + "remove_ws_hnr_suffix" + ], "tokenizer": "edge_ngram", "filter": [ + "preserving_word_delimiter", "lowercase", - "german_normalization" + "german_normalization", + "unique" ] }, "search_ngram": { + "char_filter": [ + "punctuationgreedy" + ], "tokenizer": "standard", "filter": [ "lowercase", @@ -40,6 +49,16 @@ "unique" ], "tokenizer": "standard" + }, + "index_housenumber": { + "char_filter": [ + "punctuationgreedy", + "remove_ws_hnr_suffix" + ], + "filter": [ + "lowercase", + "preserving_word_delimiter"], + "tokenizer": "standard" } }, "tokenizer": { @@ -56,13 +75,23 @@ "char_filter": { "punctuationgreedy": { "type": "pattern_replace", - "pattern": "[\\.,]" + "pattern": "[\\.,]", + "replacement": " " + }, + "remove_ws_hnr_suffix": { + "type": "pattern_replace", + "pattern": "(\\d+)\\s(?=\\p{L}\\b)", + "replacement": "$1" } }, "filter": { "photonlength": { "min": "2", "type": "length" + }, + "preserving_word_delimiter": { + "type": "word_delimiter", + "preserve_original": "true" } } } diff --git a/es/mappings.json b/es/mappings.json index 7bf3117b7..fe3fc2693 100644 --- a/es/mappings.json +++ b/es/mappings.json @@ -217,8 +217,10 @@ } }, "housenumber": { - "type": "keyword", + "type": "text", "index": true, + "analyzer": "index_housenumber", + "search_analyzer": "standard", "copy_to": [ "collector.default" ]