In [1]:
import requests
from pprint import pprint

In [2]:
ES_URI = "http://localhost:9200/my_library"

standard_analyzer_settings = {
    "settings": {
        "analysis": {
            "analyzer": {
                "standard_clone": {
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "stop"
                    ]
                }
            }
        }
    }
}

In [3]:
STRANGELOVE = "Dr. Strangelove: Or How I Learned To Stop Worrying and Love the Bomb"
FLOWER = "flower flowers flowering flowered"

def create_index(settings):
    result = requests.put(ES_URI, json=settings)
    print("Index created", result.json())

def remove_index():
    result = requests.delete(ES_URI)
    print("Index removed", result.json())
    
def analyze_query(analyzer, text):
    query = {
        "text": text,
        "analyzer": analyzer
    }
    result = requests.get(f"{ES_URI}/_analyze", json=query)
    pprint(result.json())
    

In [4]:
create_index(standard_analyzer_settings)

Index created {'error': {'root_cause': [{'type': 'resource_already_exists_exception', 'reason': 'index [my_library/1I0DqhhvTCeSBKwi7UhMKg] already exists', 'index_uuid': '1I0DqhhvTCeSBKwi7UhMKg', 'index': 'my_library'}], 'type': 'resource_already_exists_exception', 'reason': 'index [my_library/1I0DqhhvTCeSBKwi7UhMKg] already exists', 'index_uuid': '1I0DqhhvTCeSBKwi7UhMKg', 'index': 'my_library'}, 'status': 400}


In [5]:
analyze_query("standard_clone", STRANGELOVE)

{'error': {'reason': 'failed to find analyzer [standard_clone]',
           'root_cause': [{'reason': 'failed to find analyzer [standard_clone]',
                           'type': 'illegal_argument_exception'}],
           'type': 'illegal_argument_exception'},
 'status': 400}


In [6]:
remove_index()

Index removed {'acknowledged': True}


In [7]:
english_analyzer_settings = {
    "settings": {
        "analysis": {
            "filter": {
                "english_stop": {
                    "type": "stop",
                    "stopwords": "_english_"
                },
                "english_keywords": {
                    "type": "keyword_marker",
                    "keywords": ["example"]
                },
                "english_stemmer": {
                    "type": "stemmer",
                    "language": "english"
                },
                "english_possesive_stemmer": {
                    "type": "stemmer",
                    "language": "possessive_english"
                }
            },
            "analyzer": {
                "english_clone": {
                    "tokenizer": "standard",
                    "filter": [
                        "english_possesive_stemmer",
                        "lowercase",
                        "english_stop",
                        "english_keywords",
                        "english_stemmer"
                    ]
                }
            }
        }
    }
}

In [8]:
create_index(english_analyzer_settings)

Index created {'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_library'}


In [9]:
analyze_query("english_clone", STRANGELOVE)

{'tokens': [{'end_offset': 2,
             'position': 0,
             'start_offset': 0,
             'token': 'dr',
             'type': '<ALPHANUM>'},
            {'end_offset': 15,
             'position': 1,
             'start_offset': 4,
             'token': 'strangelov',
             'type': '<ALPHANUM>'},
            {'end_offset': 23,
             'position': 3,
             'start_offset': 20,
             'token': 'how',
             'type': '<ALPHANUM>'},
            {'end_offset': 25,
             'position': 4,
             'start_offset': 24,
             'token': 'i',
             'type': '<ALPHANUM>'},
            {'end_offset': 33,
             'position': 5,
             'start_offset': 26,
             'token': 'learn',
             'type': '<ALPHANUM>'},
            {'end_offset': 41,
             'position': 7,
             'start_offset': 37,
             'token': 'stop',
             'type': '<ALPHANUM>'},
            {'end_offset': 50,
             'position'

In [10]:
analyze_query("english_clone", FLOWER)

{'tokens': [{'end_offset': 6,
             'position': 0,
             'start_offset': 0,
             'token': 'flower',
             'type': '<ALPHANUM>'},
            {'end_offset': 14,
             'position': 1,
             'start_offset': 7,
             'token': 'flower',
             'type': '<ALPHANUM>'},
            {'end_offset': 24,
             'position': 2,
             'start_offset': 15,
             'token': 'flower',
             'type': '<ALPHANUM>'},
            {'end_offset': 33,
             'position': 3,
             'start_offset': 25,
             'token': 'flower',
             'type': '<ALPHANUM>'}]}


In [11]:
ALMOST_STRANGELOVE = "mr. weirdlove:  don't worry I'm learning to star loving bombs"
analyze_query("english_clone", ALMOST_STRANGELOVE)

{'tokens': [{'end_offset': 2,
             'position': 0,
             'start_offset': 0,
             'token': 'mr',
             'type': '<ALPHANUM>'},
            {'end_offset': 13,
             'position': 1,
             'start_offset': 4,
             'token': 'weirdlov',
             'type': '<ALPHANUM>'},
            {'end_offset': 21,
             'position': 2,
             'start_offset': 16,
             'token': "don't",
             'type': '<ALPHANUM>'},
            {'end_offset': 27,
             'position': 3,
             'start_offset': 22,
             'token': 'worri',
             'type': '<ALPHANUM>'},
            {'end_offset': 31,
             'position': 4,
             'start_offset': 28,
             'token': "i'm",
             'type': '<ALPHANUM>'},
            {'end_offset': 40,
             'position': 5,
             'start_offset': 32,
             'token': 'learn',
             'type': '<ALPHANUM>'},
            {'end_offset': 48,
             'positi

In [12]:
phonetic_analyzer_settings = {
    "settings": {
        "analysis": {
            "analyzer": {
                "phonetic": {
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "my_doublemetaphone"
                    ]
                }
            },
            "filter": {
                "my_doublemetaphone": {
                    "type": "phonetic",
                    "encoder": "doublemetaphone",
                    "replace": True
                }
            }
        }       
    }
}

In [13]:
remove_index()
create_index(phonetic_analyzer_settings)

Index removed {'acknowledged': True}
Index created {'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_library'}


In [14]:
analyze_query("phonetic", "Message from Dalai Lama")

{'tokens': [{'end_offset': 7,
             'position': 0,
             'start_offset': 0,
             'token': 'MSJ',
             'type': '<ALPHANUM>'},
            {'end_offset': 7,
             'position': 0,
             'start_offset': 0,
             'token': 'MSK',
             'type': '<ALPHANUM>'},
            {'end_offset': 12,
             'position': 1,
             'start_offset': 8,
             'token': 'FRM',
             'type': '<ALPHANUM>'},
            {'end_offset': 18,
             'position': 2,
             'start_offset': 13,
             'token': 'TL',
             'type': '<ALPHANUM>'},
            {'end_offset': 23,
             'position': 3,
             'start_offset': 19,
             'token': 'LM',
             'type': '<ALPHANUM>'}]}


In [15]:
analyze_query("phonetic", "Message from tall llama")

{'tokens': [{'end_offset': 7,
             'position': 0,
             'start_offset': 0,
             'token': 'MSJ',
             'type': '<ALPHANUM>'},
            {'end_offset': 7,
             'position': 0,
             'start_offset': 0,
             'token': 'MSK',
             'type': '<ALPHANUM>'},
            {'end_offset': 12,
             'position': 1,
             'start_offset': 8,
             'token': 'FRM',
             'type': '<ALPHANUM>'},
            {'end_offset': 17,
             'position': 2,
             'start_offset': 13,
             'token': 'TL',
             'type': '<ALPHANUM>'},
            {'end_offset': 23,
             'position': 3,
             'start_offset': 18,
             'token': 'LM',
             'type': '<ALPHANUM>'}]}


### 4.3

In [16]:
remove_index()

Index removed {'acknowledged': True}


In [17]:
create_index({"settings": {"number_of_shards": 1}})

Index created {'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_library'}


In [18]:
def index_document(id, doc):
    result = requests.put(f"{ES_URI}/_doc/{id}", json=doc)
    print("Indexed", result.json())

In [19]:
index_document(1, {"title": "apple apple apple apple apple"})

Indexed {'_index': 'my_library', '_type': '_doc', '_id': '1', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1}


In [20]:
index_document(2, {"title": "apple apple apple banana banana"})

Indexed {'_index': 'my_library', '_type': '_doc', '_id': '2', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1}


In [21]:
index_document(3, {"title": "apple banana blueberry coconut"})

Indexed {'_index': 'my_library', '_type': '_doc', '_id': '3', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1}


In [22]:
def search(title, explain=False):
    return requests.get(
        f"{ES_URI}/_doc/_search",
        json={"explain": explain, "query": {"match": {"title": title}}}
    )

In [23]:
search("apple", True).json()

{'took': 100,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 0, 'relation': 'eq'},
  'max_score': None,
  'hits': []}}

In [24]:
index_document(4, {"title": "apples apple"})

Indexed {'_index': 'my_library', '_type': '_doc', '_id': '4', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 3, '_primary_term': 1}


In [25]:
search("apple apple", False).json()

{'took': 6,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 0, 'relation': 'eq'},
  'max_score': None,
  'hits': []}}

In [26]:
remove_index()

Index removed {'acknowledged': True}


In [27]:
create_index(english_analyzer_settings)

Index created {'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_library'}


In [28]:
remove_index()
# appraently both the book example and elastic official docs example: 
# https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-lang-analyzer.html#english-analyzer
# do not solve the problem and regular english analyzer needs to be used
create_index(
    {"settings": {
        "analysis": {
            "analyzer": {
                "default": {
                    "type": "english"
                }
            }
        }
      }
    }
)

Index removed {'acknowledged': True}
Index created {'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_library'}


In [29]:
def index_documents(documents):
    for id, doc in enumerate(documents):
        index_document(id, doc)

In [30]:
docs = [
    {"title": "apple apple apple apple apple"},
    {"title": "apple apple apple banana banana"},
    {"title": "apple banana blueberry coconut"},
    {"title": "apples apple"}
]

In [31]:
index_documents(docs)

Indexed {'_index': 'my_library', '_type': '_doc', '_id': '0', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1}
Indexed {'_index': 'my_library', '_type': '_doc', '_id': '1', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1}
Indexed {'_index': 'my_library', '_type': '_doc', '_id': '2', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1}
Indexed {'_index': 'my_library', '_type': '_doc', '_id': '3', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 3, '_primary_term': 1}


In [32]:
search("apple", False).json()

{'took': 4,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 0, 'relation': 'eq'},
  'max_score': None,
  'hits': []}}

### 4.4

In [33]:
standard_with_acronyms = {
    "settings": {
        "analysis": {
            "filter": {
                "acronyms": {
                    "type": "word_delimiter",
                    "catenate_all": True,
                    "generate_word_parts": False,
                    "generate_number_parts": False,
                    "perserve_original": True
                }
            },
            "analyzer": {
                "standard_with_acronyms": {
                    "tokenizer": "standard",
                    "filter": ["lowercase", "acronyms"]
                }
            }
        }
    }
}

In [34]:
remove_index()
create_index(standard_with_acronyms)

Index removed {'acknowledged': True}
Index created {'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_library'}


In [35]:
analyze_query("standard_with_acronyms", "I.B.M versus IBM versus ibm")

{'tokens': [{'end_offset': 5,
             'position': 0,
             'start_offset': 0,
             'token': 'ibm',
             'type': '<ALPHANUM>'},
            {'end_offset': 12,
             'position': 1,
             'start_offset': 6,
             'token': 'versus',
             'type': '<ALPHANUM>'},
            {'end_offset': 16,
             'position': 2,
             'start_offset': 13,
             'token': 'ibm',
             'type': '<ALPHANUM>'},
            {'end_offset': 23,
             'position': 3,
             'start_offset': 17,
             'token': 'versus',
             'type': '<ALPHANUM>'},
            {'end_offset': 27,
             'position': 4,
             'start_offset': 24,
             'token': 'ibm',
             'type': '<ALPHANUM>'}]}


In [36]:
docs.append({"title": "N.E.W techno"})

In [37]:
docs.append({"title": "new apple"})

In [38]:
index_documents(docs)

Indexed {'_index': 'my_library', '_type': '_doc', '_id': '0', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1}
Indexed {'_index': 'my_library', '_type': '_doc', '_id': '1', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1}
Indexed {'_index': 'my_library', '_type': '_doc', '_id': '2', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1}
Indexed {'_index': 'my_library', '_type': '_doc', '_id': '3', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 3, '_primary_term': 1}
Indexed {'_index': 'my_library', '_type': '_doc', '_id': '4', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 4, '_primary_term': 1}
Indexed {'_index': 'my_library', '_type': '_doc', '_id': '5', '_v

In [39]:
search("n.e.w", False).json()

{'took': 2,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 0, 'relation': 'eq'},
  'max_score': None,
  'hits': []}}

In [40]:
phone_num_analyzer = {
    "settings": {
        "analysis": {
            "filter": {
                "phone_num_filter": {
                    "type": "word_delimiter",
                    "catenate_all": True,
                    "generate_number_parts": False
                },
                "phone_num_parts": {
                    "type": "pattern_capture",
                    "patterns": ["(\\d{7}$)","(\\d{10}$)"],
                    "perserve_original": True
                }
            },
            "analyzer": {
                "phone_num": {
                    "tokenizer": "keyword",
                    "filter": ["phone_num_filter", "phone_num_parts"]
                }
            }
        }
    }
}

In [41]:
remove_index()
create_index(phone_num_analyzer)

Index removed {'acknowledged': True}
Index created {'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_library'}


In [42]:
analyze_query("phone_num", "1(800)867-5309")

{'tokens': [{'end_offset': 14,
             'position': 0,
             'start_offset': 0,
             'token': '18008675309',
             'type': 'word'},
            {'end_offset': 14,
             'position': 0,
             'start_offset': 0,
             'token': '8008675309',
             'type': 'word'},
            {'end_offset': 14,
             'position': 0,
             'start_offset': 0,
             'token': '8675309',
             'type': 'word'}]}


### 4.4.2

In [43]:
from copy import deepcopy
retail_analyzer_settings = deepcopy(english_analyzer_settings)
retail_analyzer_settings["settings"]["analysis"]["filter"]["retail_syn_filter"] = {
    "type": "synonym",
    "synonyms": [
        "dress shoe,dress shoes => dress_shoe, shoe"
    ]
}
retail_analyzer_settings["settings"]["analysis"]["analyzer"] = {
    "retail_analyzer": {
        "tokenizer": "standard",
        "filter": [
            "english_possesive_stemmer",
            "lowercase",
            "retail_syn_filter",
            "english_keywords",
            "english_stemmer"
        ]
    }
}
retail_analyzer_settings["mappings"] = {
    "properties": {
        "desc": {
            "type": "text",
            "analyzer": "retail_analyzer"
        }
    }
}

In [44]:
remove_index()

Index removed {'acknowledged': True}


In [45]:
create_index(retail_analyzer_settings)

Index created {'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_library'}


In [46]:
retail = [
    {"desc": "bob's brand dress shoes are the bom diggity"},
    {"desc": "this little black dress is sure to impress"},
    {"desc": "tennis shoes... you know, for tennis"}
]

In [47]:
index_documents(retail)

Indexed {'_index': 'my_library', '_type': '_doc', '_id': '0', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1}
Indexed {'_index': 'my_library', '_type': '_doc', '_id': '1', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1}
Indexed {'_index': 'my_library', '_type': '_doc', '_id': '2', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1}


In [48]:
def search_using_query(query):
    return requests.get(
        f"{ES_URI}/_doc/_search",
        json=query
    )


In [49]:
search_using_query({
    "query": {
        "match": {"desc": "dress"}
    }
}).json()

{'took': 3,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 0, 'relation': 'eq'},
  'max_score': None,
  'hits': []}}

In [50]:
search_using_query({"query": {"match": {"desc": "shoe"}}}).json()

{'took': 1,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 0, 'relation': 'eq'},
  'max_score': None,
  'hits': []}}

In [51]:
search_using_query({"query": {"match": {"desc": "dress shoe"}}}).json()

{'took': 11,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 0, 'relation': 'eq'},
  'max_score': None,
  'hits': []}}

In [52]:
analyze_query("retail_analyzer", retail[2]["desc"])

{'tokens': [{'end_offset': 6,
             'position': 0,
             'start_offset': 0,
             'token': 'tenni',
             'type': '<ALPHANUM>'},
            {'end_offset': 12,
             'position': 1,
             'start_offset': 7,
             'token': 'shoe',
             'type': '<ALPHANUM>'},
            {'end_offset': 19,
             'position': 2,
             'start_offset': 16,
             'token': 'you',
             'type': '<ALPHANUM>'},
            {'end_offset': 24,
             'position': 3,
             'start_offset': 20,
             'token': 'know',
             'type': '<ALPHANUM>'},
            {'end_offset': 29,
             'position': 4,
             'start_offset': 26,
             'token': 'for',
             'type': '<ALPHANUM>'},
            {'end_offset': 36,
             'position': 5,
             'start_offset': 30,
             'token': 'tenni',
             'type': '<ALPHANUM>'}]}


In [53]:
separate_analyzer_index_search = deepcopy(retail_analyzer_settings)

In [54]:
del separate_analyzer_index_search["settings"]["analysis"]["filter"]["retail_syn_filter"]
separate_analyzer_index_search["settings"]["analysis"]["filter"]["retail_syn_filter_index"] = {
    "type":"synonym",
    "synonyms": ["dress shoe,dress shoes => dress_shoe, shoe"]
}
separate_analyzer_index_search["settings"]["analysis"]["filter"]["retail_syn_filter_search"] = {
    "type": "synonym",
    "synonyms": ["dress shoe,dress shoes => dress_shoe"]
}

separate_analyzer_index_search["settings"]["analysis"]["analyzer"] = {
    "retail_analyzer_index": {
        "tokenizer": "standard",
        "filter": [
            "english_possesive_stemmer",
            "lowercase",
            "retail_syn_filter_index",
            "english_stop",
            "english_keywords",
            "english_stemmer"
        ]
    },
    "retail_analyzer_search": {
        "tokenizer": "standard",
        "filter": [
            "english_possesive_stemmer",
            "lowercase",
            "retail_syn_filter_search",
            "english_stop",
            "english_keywords",
            "english_stemmer"
        ]
    }
}

separate_analyzer_index_search["mappings"]["properties"]["desc"] = {
    "type": "text",
    "analyzer": "retail_analyzer_index",
    "search_analyzer": "retail_analyzer_search"
}

In [55]:
remove_index()
create_index(separate_analyzer_index_search)

Index removed {'acknowledged': True}
Index created {'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_library'}


In [56]:
index_documents(retail)

Indexed {'_index': 'my_library', '_type': '_doc', '_id': '0', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1}
Indexed {'_index': 'my_library', '_type': '_doc', '_id': '1', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1}
Indexed {'_index': 'my_library', '_type': '_doc', '_id': '2', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1}


In [63]:
search_using_query({"query": {"match": {"desc": "dress shoe"}}}).json()

{'took': 3,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 1, 'relation': 'eq'},
  'max_score': 1.0065652,
  'hits': [{'_index': 'my_library',
    '_type': '_doc',
    '_id': '0',
    '_score': 1.0065652,
    '_source': {'desc': "bob's brand dress shoes are the bom diggity"}}]}}

In [61]:
search_using_query({"query": {"match": {"desc": "shoe"}}}).json()

{'took': 2,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 2, 'relation': 'eq'},
  'max_score': 0.4823361,
  'hits': [{'_index': 'my_library',
    '_type': '_doc',
    '_id': '0',
    '_score': 0.4823361,
    '_source': {'desc': "bob's brand dress shoes are the bom diggity"}},
   {'_index': 'my_library',
    '_type': '_doc',
    '_id': '2',
    '_score': 0.4823361,
    '_source': {'desc': 'tennis shoes... you know, for tennis'}}]}}

In [62]:
search_using_query({"query": {"match": {"desc": "dress"}}}).json()

{'took': 3,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 1, 'relation': 'eq'},
  'max_score': 1.0065652,
  'hits': [{'_index': 'my_library',
    '_type': '_doc',
    '_id': '1',
    '_score': 1.0065652,
    '_source': {'desc': 'this little black dress is sure to impress'}}]}}

### 4.4.5

In [64]:
catalog_index = {
    "settings": {
        "analysis": {
            "analyzer": {
                "path_hierarchy": {
                    "tokenizer": "path_hierarchy"
                }
            }
        }
    },
    "mappings": {
        "properties": {
            "inventory_dir": {
                "type": "text",
                "analyzer": "path_hierarchy"
            }
        }
    }
}

In [65]:
ES_URI = "http://localhost:9200/catalog"

In [66]:
create_index(catalog_index)

Index created {'acknowledged': True, 'shards_acknowledged': True, 'index': 'catalog'}


In [67]:
docs = [
    {"inventory_dir": "/fruit/apples/fuji", "description": "crisp, sweet-flavored, long shelf-life"},
    {"inventory_dir": "/fruit/apples/gala", "description": "sweet, pleasant apple"},
    {"inventory_dir": "/fruit", "description": "ediblem, seed-bearing portion of plants"}
]

In [68]:
index_documents(docs)

Indexed {'_index': 'catalog', '_type': '_doc', '_id': '0', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 0, '_primary_term': 1}
Indexed {'_index': 'catalog', '_type': '_doc', '_id': '1', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 1, '_primary_term': 1}
Indexed {'_index': 'catalog', '_type': '_doc', '_id': '2', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 2, '_primary_term': 1}


In [69]:
search_using_query({
    "query": {
        "bool": {
            "should": [{"match": {"description": "<whatever>"}}],
            "filter": [{"term": {"inventory_dir": "/fruit/apples/fuji"}}]
        }
    }
}).json()

{'took': 1034,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 1, 'relation': 'eq'},
  'max_score': 0.0,
  'hits': [{'_index': 'catalog',
    '_type': '_doc',
    '_id': '0',
    '_score': 0.0,
    '_source': {'inventory_dir': '/fruit/apples/fuji',
     'description': 'crisp, sweet-flavored, long shelf-life'}}]}}

In [70]:
search_using_query({
    "query": {
        "bool": {
            "should": [{"match": {"description": "<whatever>"}}],
            "filter": [{"term": {"inventory_dir": "/fruit/apples"}}]
        }
    }
}).json()

{'took': 2,
 'timed_out': False,
 '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0},
 'hits': {'total': {'value': 2, 'relation': 'eq'},
  'max_score': 0.0,
  'hits': [{'_index': 'catalog',
    '_type': '_doc',
    '_id': '0',
    '_score': 0.0,
    '_source': {'inventory_dir': '/fruit/apples/fuji',
     'description': 'crisp, sweet-flavored, long shelf-life'}},
   {'_index': 'catalog',
    '_type': '_doc',
    '_id': '1',
    '_score': 0.0,
    '_source': {'inventory_dir': '/fruit/apples/gala',
     'description': 'sweet, pleasant apple'}}]}}