In [1196]:
from pprint import pprint 

from elasticsearch import Elasticsearch

es = Elasticsearch()

# names
yowl = 'yowl'
restaurant = 'restaurant'

location = 'location'
name = 'name'
cuisine_high_q = 'cuisine_high_q'
cuisine_low_q = 'cuisine_low_q'
menu = 'menu'
description = 'description'
price = 'price'
rating = 'rating'
has_discount = 'has_discount'
promoted = 'promoted'
engaged = 'engaged'

apples_bees = 'apples bees'
burger_kink = 'burger kink'
pizza_hit = 'pizza hit'
taco_belly = 'taco belly'
wednesdays = 'wednesdays'

def recreate_index():
    
    try:
        es.indices.delete(yowl)
    except:
        pass

    body = {
        'settings': {
            # just one shard, no replicas for testing
            'number_of_shards': 1,
            'number_of_replicas': 0,
        },
        "mappings": {
            restaurant: {
                "properties": {
                    location: {
                        "type": "geo_point"
                    }
                }
            }
        }
    }
    es.indices.create(yowl, body)

def index_docs(docs):
    recreate_index()
    for doc in docs:
        es.index(yowl,restaurant,doc)
    es.indices.flush(yowl)
    
def search(query):
    docs = es.search(yowl, restaurant, query)['hits']['hits']
    docs = [dict(name=doc['_source']['name'],score=doc['_score']) for doc in docs]
    pprint(docs)
    return docs
    
def assert_in(query, names):
    """
    asserts all named restaurants are in the results
    """
    if isinstance(names,basestring):
        names = [names]
    result = es.search(yowl,restaurant,body=query)
    docs = result['hits']['hits']
    names_in_result = [ doc['_source']['name'] for doc in docs]
    for name in names:
        assert name in names_in_result
        
def assert_not_in(query, names):
    """
    asserts all named restaurants are in the results
    """
    if isinstance(names,basestring):
        names = [names]
    result = es.search(yowl,restaurant,body=query)
    docs = result['hits']['hits']
    names_in_result = [ doc['_source']['name'] for doc in docs]
    for name in names:
        assert not name in names_in_result
        
def assert_first(query, name):
    """
    asserts all named restaurants are in the results
    """
    result = es.search(yowl,restaurant,body=query)
    docs = result['hits']['hits']
    first_name = docs[0]['_source']['name']
    assert name == first_name


In [1204]:
index_docs([
    {  # strong in content, weak in location and business
        name: pizza_hit,
        location: {
            'lat': 36.06,
            'lon': -86.84
        },
        price: 'D',
        rating: 'S',
        description: 'pizza! pizza! pizza!',
        cuisine_high_q: 'italian, pizza',
        cuisine_low_q: 'pizza',
        engaged: True,
    },
    {  # strong in location, weak in content and business
        name: taco_belly,
        location: {
            'lat': 36.154547,
            'lon': -86.782277
        },
        price: 'D',
        rating: 'S',
        description: 'we sell tacos and carbonated beverages - sometimes we sell pizza',
        cuisine_low_q: 'mexican, fastfood',
        engaged: True,
    },
    {  # strong in business, weak in location and content
        name: burger_kink,
        location: {
            'lat': 36.06,
            'lon': -86.84
        },
        price: 'D',
        rating: 'S',
        description: 'burgers and rarely we also have pizza - hey, but it\'s good!',
        cuisine_low_q: 'fastfood, american',
        has_discount: True,
        promoted: True,
        engaged: True,
    },
    {  # doesn't match
        name: apples_bees,
        location: {
            'lat': 30.154547,
            'lon': -80.782277
        },
        price: 'DD',
        rating: 'SSS',
        description: 'no wedge shaped food here',
        cuisine_low_q: 'american',
        has_discount: True,
        promoted: True,
        engaged: True,
    }
])

### Content

In [1205]:
query = {
    'query': {
        'multi_match': {
            'query': 'pizza',
            'fields': ['name^10', 'cuisine_high_q^10', 'cuisine_low_q^4', 'menu^2', 'description^1'],
            'tie_breaker': 0.3,
        },
    }
}
search(query);

[{'name': u'pizza hit', 'score': 1.5118824},
 {'name': u'taco belly', 'score': 0.017503675},
 {'name': u'burger kink', 'score': 0.014002941}]


### Customer Preferences

In [1211]:
query = { 
    'query': {
        'bool': {
            'filter': [
                {'match':{
                    price: 'D',
                }},
                {'match':{
                    rating: 'S',
                }}
            ]
        }
    }
}
search(query);

[{'name': u'pizza hit', 'score': 0.0},
 {'name': u'taco belly', 'score': 0.0},
 {'name': u'burger kink', 'score': 0.0}]


### Location

In [1200]:
query = {
    'filter': {
        'geo_bounding_box': { 
            location: {
                'top_left': {
                    'lat': 36.35,
                    'lon': -86.88,
                },
                'bottom_right': {
                    'lat': 36.05,
                    'lon': -86.68,
                }
            } 
        }
    }
}
search(query);

[{'name': u'pizza hit', 'score': 1.0},
 {'name': u'taco belly', 'score': 1.0},
 {'name': u'burger kink', 'score': 1.0}]


In [1201]:
# boost
query = {
    'query': {
        'function_score': {
            'functions': [{
                'gauss': {
                    location: { 
                        'origin': {
                                'lat': 36.154547,
                                'lon': -86.782277
                        },
                        'offset': '0km',
                        'scale':  '10km'
                    }
                }
            }
        ]}
    }
}
search(query);

[{'name': u'taco belly', 'score': 1.0},
 {'name': u'pizza hit', 'score': 0.38580385},
 {'name': u'burger kink', 'score': 0.38580385},
 {'name': u'apples bees', 'score': 0.0}]


### Business

In [1208]:
query = {
    'query': {
        'function_score': {
            'functions': [{
                'filter': {
                    'bool': {
                        'should': [
                            { 'term': { has_discount: True }},
                            { 'term': { promoted: True }},
                            { 'term': { engaged: True }},                        
                        ]
                    }
                },
                'script_score' : {
                    'script': """
                        0.3*doc['has_discount'].value +
                        0.5*doc['promoted'].value +
                        0.2*doc['engaged'].value
                    """
                }
            }]
        }
    }
}
search(query);

[{'name': u'burger kink', 'score': 1.0},
 {'name': u'apples bees', 'score': 1.0},
 {'name': u'pizza hit', 'score': 0.2},
 {'name': u'taco belly', 'score': 0.2}]


### Combined

In [1216]:
query = {
    'filter': {
        'bool': {
            'filter': [
                { # location filter
                    'geo_bounding_box': { 
                        location: {
                            'top_left': {
                                'lat': 36.35,
                                'lon': -86.88,
                            },
                            'bottom_right': {
                                'lat': 36.05,
                                'lon': -86.68,
                            }
                        } 
                    }
                },
                { # customer preference
                    'match':{
                        'price': 'D',
                }},
                { # customer preference
                    'match':{
                        'rating': 'S',
                }}
            ]}
    },
    'query': {
        'function_score': {
            'score_mode': 'sum',
            'query': { # content
                'multi_match': {
                    'query': 'pizza',
                    'fields': ['name^10', 'cuisine_high_q^10', 'cuisine_low_q^4', 'menu^2', 'description^1'],
                },
            },
            'functions': [
                { # business concerns
                    'weight': 1,
                    'filter': {
                        'bool': {
                            'should': [
                                { 'term': { has_discount: True }},
                                { 'term': { promoted: True }},
                                { 'term': { engaged: True }},                        
                            ]
                        }
                    },
                    'script_score' : {
                        'script': """
                            0.3*doc['has_discount'].value +
                            0.5*doc['promoted'].value +
                            0.2*doc['engaged'].value
                        """
                    }
                },
                { # location
                    'weight': 1,
                    'gauss': {
                        'location': { 
                            'origin': {
                                    'lat': 36.154547,
                                    'lon': -86.782277
                            },
                            'offset': '0km',
                            'scale':  '4km', 
                        }
                    },
                },
                { # content weight
                    'weight': 0
                }
            ]
        }
    }
}
search(query);

[{'name': u'pizza hit', 'score': 0.2143936},
 {'name': u'taco belly', 'score': 0.022148103},
 {'name': u'burger kink', 'score': 0.014803776}]
