In [21]:
import requests
from pprint import pprint

In [2]:
ES_URI = "http://localhost:9200/my_library"

standard_analyzer_settings = {
    "settings": {
        "analysis": {
            "analyzer": {
                "standard_clone": {
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "stop"
                    ]
                }
            }
        }
    }
}

In [29]:
STRANGELOVE = "Dr. Strangelove: Or How I Learned To Stop Worrying and Love the Bomb"
FLOWER = "flower flowers flowering flowered"

def create_index(settings):
    result = requests.put(ES_URI, json=settings)
    print("Index created", result.json())

def remove_index():
    result = requests.delete(ES_URI)
    print("Index removed", result.json())
    
def analyze_query(analyzer, text):
    query = {
        "text": text,
        "analyzer": analyzer
    }
    result = requests.get(f"{ES_URI}/_analyze", json=query)
    pprint(result.json())
    

In [15]:
remove_index()

Index removed {'acknowledged': True}


In [16]:
create_index(standard_analyzer_settings)

Index created {'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_library'}


In [23]:
analyze_query("standard_clone", STRANGELOVE)

{'tokens': [{'end_offset': 2,
             'position': 0,
             'start_offset': 0,
             'token': 'dr',
             'type': '<ALPHANUM>'},
            {'end_offset': 15,
             'position': 1,
             'start_offset': 4,
             'token': 'strangelove',
             'type': '<ALPHANUM>'},
            {'end_offset': 23,
             'position': 3,
             'start_offset': 20,
             'token': 'how',
             'type': '<ALPHANUM>'},
            {'end_offset': 25,
             'position': 4,
             'start_offset': 24,
             'token': 'i',
             'type': '<ALPHANUM>'},
            {'end_offset': 33,
             'position': 5,
             'start_offset': 26,
             'token': 'learned',
             'type': '<ALPHANUM>'},
            {'end_offset': 41,
             'position': 7,
             'start_offset': 37,
             'token': 'stop',
             'type': '<ALPHANUM>'},
            {'end_offset': 50,
             'positi

In [24]:
remove_index()

Index removed {'acknowledged': True}


In [25]:
english_analyzer_settings = {
    "settings": {
        "analysis": {
            "filter": {
                "english_stop": {
                    "type": "stop",
                    "stopwords": "_english_"
                },
                "english_keywords": {
                    "type": "keyword_marker",
                    "keywords": ["example"]
                },
                "english_stemmer": {
                    "type": "stemmer",
                    "language": "english"
                },
                "english_possesive_stemmer": {
                    "type": "stemmer",
                    "language": "possessive_english"
                }
            },
            "analyzer": {
                "english_clone": {
                    "tokenizer": "standard",
                    "filter": [
                        "english_possesive_stemmer",
                        "lowercase",
                        "english_stop",
                        "english_keywords",
                        "english_stemmer"
                    ]
                }
            }
        }
    }
}

In [26]:
create_index(english_analyzer_settings)

Index created {'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_library'}


In [28]:
analyze_query("english_clone", STRANGELOVE)

{'tokens': [{'end_offset': 2,
             'position': 0,
             'start_offset': 0,
             'token': 'dr',
             'type': '<ALPHANUM>'},
            {'end_offset': 15,
             'position': 1,
             'start_offset': 4,
             'token': 'strangelov',
             'type': '<ALPHANUM>'},
            {'end_offset': 23,
             'position': 3,
             'start_offset': 20,
             'token': 'how',
             'type': '<ALPHANUM>'},
            {'end_offset': 25,
             'position': 4,
             'start_offset': 24,
             'token': 'i',
             'type': '<ALPHANUM>'},
            {'end_offset': 33,
             'position': 5,
             'start_offset': 26,
             'token': 'learn',
             'type': '<ALPHANUM>'},
            {'end_offset': 41,
             'position': 7,
             'start_offset': 37,
             'token': 'stop',
             'type': '<ALPHANUM>'},
            {'end_offset': 50,
             'position'

In [30]:
analyze_query("english_clone", FLOWER)

{'tokens': [{'end_offset': 6,
             'position': 0,
             'start_offset': 0,
             'token': 'flower',
             'type': '<ALPHANUM>'},
            {'end_offset': 14,
             'position': 1,
             'start_offset': 7,
             'token': 'flower',
             'type': '<ALPHANUM>'},
            {'end_offset': 24,
             'position': 2,
             'start_offset': 15,
             'token': 'flower',
             'type': '<ALPHANUM>'},
            {'end_offset': 33,
             'position': 3,
             'start_offset': 25,
             'token': 'flower',
             'type': '<ALPHANUM>'}]}


In [31]:
ALMOST_STRANGELOVE = "mr. weirdlove:  don't worry I'm learning to star loving bombs"
analyze_query("english_clone", ALMOST_STRANGELOVE)

{'tokens': [{'end_offset': 2,
             'position': 0,
             'start_offset': 0,
             'token': 'mr',
             'type': '<ALPHANUM>'},
            {'end_offset': 13,
             'position': 1,
             'start_offset': 4,
             'token': 'weirdlov',
             'type': '<ALPHANUM>'},
            {'end_offset': 21,
             'position': 2,
             'start_offset': 16,
             'token': "don't",
             'type': '<ALPHANUM>'},
            {'end_offset': 27,
             'position': 3,
             'start_offset': 22,
             'token': 'worri',
             'type': '<ALPHANUM>'},
            {'end_offset': 31,
             'position': 4,
             'start_offset': 28,
             'token': "i'm",
             'type': '<ALPHANUM>'},
            {'end_offset': 40,
             'position': 5,
             'start_offset': 32,
             'token': 'learn',
             'type': '<ALPHANUM>'},
            {'end_offset': 48,
             'positi

In [32]:
phonetic_analyzer_settings = {
    "settings": {
        "analysis": {
            "analyzer": {
                "phonetic": {
                    "tokenizer": "standard",
                    "filter": [
                        "lowercase",
                        "my_doublemetaphone"
                    ]
                }
            },
            "filter": {
                "my_doublemetaphone": {
                    "type": "phonetic",
                    "encoder": "doublemetaphone",
                    "replace": True
                }
            }
        }       
    }
}

In [33]:
remove_index()
create_index(phonetic_analyzer_settings)

Index removed {'acknowledged': True}
Index created {'acknowledged': True, 'shards_acknowledged': True, 'index': 'my_library'}


In [34]:
analyze_query("phonetic", "Message from Dalai Lama")

{'tokens': [{'end_offset': 7,
             'position': 0,
             'start_offset': 0,
             'token': 'MSJ',
             'type': '<ALPHANUM>'},
            {'end_offset': 7,
             'position': 0,
             'start_offset': 0,
             'token': 'MSK',
             'type': '<ALPHANUM>'},
            {'end_offset': 12,
             'position': 1,
             'start_offset': 8,
             'token': 'FRM',
             'type': '<ALPHANUM>'},
            {'end_offset': 18,
             'position': 2,
             'start_offset': 13,
             'token': 'TL',
             'type': '<ALPHANUM>'},
            {'end_offset': 23,
             'position': 3,
             'start_offset': 19,
             'token': 'LM',
             'type': '<ALPHANUM>'}]}


In [35]:
analyze_query("phonetic", "Message from tall llama")

{'tokens': [{'end_offset': 7,
             'position': 0,
             'start_offset': 0,
             'token': 'MSJ',
             'type': '<ALPHANUM>'},
            {'end_offset': 7,
             'position': 0,
             'start_offset': 0,
             'token': 'MSK',
             'type': '<ALPHANUM>'},
            {'end_offset': 12,
             'position': 1,
             'start_offset': 8,
             'token': 'FRM',
             'type': '<ALPHANUM>'},
            {'end_offset': 17,
             'position': 2,
             'start_offset': 13,
             'token': 'TL',
             'type': '<ALPHANUM>'},
            {'end_offset': 23,
             'position': 3,
             'start_offset': 18,
             'token': 'LM',
             'type': '<ALPHANUM>'}]}
