In [1]:
'''
A notebook to run and show demo of various queries
'''

'\nA notebook to run and show demo of various queries\n'

In [2]:
import json

from os.path import join

from elasticsearch import Elasticsearch
from elasticsearch.helpers import bulk

In [3]:
ES_HOST = 'http://localhost'
ES_PORT = 9202
ES_HOST_STRING = '{}:{}'.format(ES_HOST, ES_PORT)

INDEX_NAME = 'movies'
TYPE_NAME = 'marvel'

In [4]:
mapping = '''{
  "mappings": {
    "marvel": {
      "properties": {
        "title": {
          "type": "text"
        },
        "year": {
          "type": "long"
        },
        "directors": {
          "type": "text"
        },
        "imdb_rating": {
          "type": "float"
        },
        "summary": {
          "type": "text"
        },
        "duration": {
          "type": "integer"
        }
      }
    }
  }
}'''

In [5]:
es = Elasticsearch([ES_HOST_STRING])

In [6]:
es.indices.create(index=INDEX_NAME, ignore=400, body=mapping)

{u'acknowledged': True, u'index': u'movies', u'shards_acknowledged': True}

In [7]:
data_file = join('data', 'marvel_movies_released.json')

In [8]:
with open(data_file) as infile:
    data = json.load(infile)

In [9]:
def make_documents(doc, doc_id):
    es_doc = {
        '_op_type': 'index',
        '_index': INDEX_NAME,
        '_type': TYPE_NAME,
        '_id': doc_id,
        '_source': doc
    }
    return es_doc

In [10]:
bulk(es, [make_documents(doc, idx) for idx, doc in enumerate(data)], index=INDEX_NAME, doc_type=TYPE_NAME)

(19, [])

In [11]:
# search for ironman
search_query = '''{
  "query": {
    "match": {
      "title": "iron man"
    }
  }
}'''

In [12]:
response = es.search(index=INDEX_NAME, doc_type=TYPE_NAME, body=search_query, _source_include=['title'])

In [13]:
def pretty_print_es_response(es_response, return_score=False):
    pretty_response = []
    hits = es_response.get('hits', []).get('hits', [])
    for hit in hits:
        source = hit.get('_source')
        source['id'] = hit.get('_id')
        if return_score:
            source['score'] = hit.get('_score')
        pretty_response.append(source)
    return pretty_response

In [14]:
pretty_print_es_response(response)

[{'id': u'12', u'title': u'Iron Man 3'},
 {'id': u'18', u'title': u'Iron Man'},
 {'id': u'16', u'title': u'Iron Man 2'},
 {'id': u'2', u'title': u'Spider-Man: Homecoming'},
 {'id': u'7', u'title': u'Ant-Man'}]

In [15]:
# search for ironman on title and summary field
search_query = '''{
  "query": {
    "multi_match": {
      "query": "iron man",
      "fields": ["title", "summary"]
    }
  }
}'''

In [16]:
response = es.search(index=INDEX_NAME, doc_type=TYPE_NAME, body=search_query, _source_include=['title', 'summary'])

In [17]:
pretty_print_es_response(response)

[{'id': u'16',
  u'summary': u"With the world now aware of his identity as Iron Man, Tony Stark must contend with both his declining health and a vengeful mad man with ties to his father's legacy.",
  u'title': u'Iron Man 2'},
 {'id': u'12',
  u'summary': u"When Tony Stark's world is torn apart by a formidable terrorist called the Mandarin, he starts an odyssey of rebuilding and retribution.",
  u'title': u'Iron Man 3'},
 {'id': u'6',
  u'summary': u"Political involvement in the Avengers' activities causes a rift between Captain America and Iron Man.",
  u'title': u'Captain America: Civil War'},
 {'id': u'18',
  u'summary': u'After being held captive in an Afghan cave, billionaire engineer Tony Stark creates a unique weaponized suit of armor to fight evil.',
  u'title': u'Iron Man'},
 {'id': u'2',
  u'summary': u'Peter Parker balances his life as an ordinary high school student in Queens with his superhero alter-ego Spider-Man, and finds himself on the trail of a new menace prowling th

In [18]:
# Notice how movie with id `6` appears though it does not contain Iron man in the title.

In [19]:
# lets search for iron man again but this with a boost on summary field
search_query = '''{
  "query": {
    "multi_match": {
      "query": "iron man",
      "fields": ["title", "summary^2"]
    }
  },
  "_source": ["id", "title", "summary"]
}'''

In [20]:
response = es.search(index=INDEX_NAME, doc_type=TYPE_NAME, body=search_query, _source_include=['title', 'summary'])
pretty_print_es_response(response)

[{'id': u'16',
  u'summary': u"With the world now aware of his identity as Iron Man, Tony Stark must contend with both his declining health and a vengeful mad man with ties to his father's legacy.",
  u'title': u'Iron Man 2'},
 {'id': u'6',
  u'summary': u"Political involvement in the Avengers' activities causes a rift between Captain America and Iron Man.",
  u'title': u'Captain America: Civil War'},
 {'id': u'12',
  u'summary': u"When Tony Stark's world is torn apart by a formidable terrorist called the Mandarin, he starts an odyssey of rebuilding and retribution.",
  u'title': u'Iron Man 3'},
 {'id': u'18',
  u'summary': u'After being held captive in an Afghan cave, billionaire engineer Tony Stark creates a unique weaponized suit of armor to fight evil.',
  u'title': u'Iron Man'},
 {'id': u'2',
  u'summary': u'Peter Parker balances his life as an ordinary high school student in Queens with his superhero alter-ego Spider-Man, and finds himself on the trail of a new menace prowling th

In [21]:
# notice how the ranking changes of doc with id `6` and id `12` when a boost factor is given to `summary` field

In [22]:
# search for captan amrica
search_query = '''{
  "query": {
    "match": {
      "title": {
        "query": "captan amrica",
        "fuzziness": 2
      }
    }
  }
}'''

In [23]:
response = es.search(index=INDEX_NAME, doc_type=TYPE_NAME, body=search_query, _source_include=['title', 'summary'])
pretty_print_es_response(response)

[{'id': u'10',
  u'summary': u'As Steve Rogers struggles to embrace his role in the modern world, he teams up with a fellow Avenger and S.H.I.E.L.D agent, Black Widow, to battle a new threat from history: an assassin known as the Winter Soldier.',
  u'title': u'Captain America: The Winter Soldier'},
 {'id': u'6',
  u'summary': u"Political involvement in the Avengers' activities causes a rift between Captain America and Iron Man.",
  u'title': u'Captain America: Civil War'},
 {'id': u'14',
  u'summary': u"Steve Rogers, a rejected military soldier transforms into Captain America after taking a dose of a 'Super-Soldier serum'. But being Captain America comes at a price as he attempts to take down a war monger and a terrorist organization.",
  u'title': u'Captain America: The First Avenger'}]

In [24]:
# notice how fuzzy value of 2 takes care of captan(captain) and amrica(america)

In [25]:
# search for iron man as a phrase
search_query = '''{
  "query": {
    "match_phrase": {
      "title": "iron man"
    }
  }
}'''

In [26]:
response = es.search(index=INDEX_NAME, doc_type=TYPE_NAME, body=search_query, _source_include=['title', 'summary'])
pretty_print_es_response(response)

[{'id': u'12',
  u'summary': u"When Tony Stark's world is torn apart by a formidable terrorist called the Mandarin, he starts an odyssey of rebuilding and retribution.",
  u'title': u'Iron Man 3'},
 {'id': u'18',
  u'summary': u'After being held captive in an Afghan cave, billionaire engineer Tony Stark creates a unique weaponized suit of armor to fight evil.',
  u'title': u'Iron Man'},
 {'id': u'16',
  u'summary': u"With the world now aware of his identity as Iron Man, Tony Stark must contend with both his declining health and a vengeful mad man with ties to his father's legacy.",
  u'title': u'Iron Man 2'}]

In [27]:
# notice how only three movies appear, in comparison to 5 when iron man was searched as a match query

In [28]:
# search for movies of a particular year
search_query = '''{
  "query": {
    "bool": {
      "filter": {
        "term": {
          "year": 2018
        }
      }
    }
  }
}'''

In [29]:
response = es.search(index=INDEX_NAME, doc_type=TYPE_NAME, body=search_query, _source_include=['title', 'summary', 'year'])
pretty_print_es_response(response)

[{'id': u'0',
  u'summary': u"T'Challa, the King of Wakanda, rises to the throne in the isolated, technologically advanced African nation, but his claim is challenged by a vengeful outsider who was a childhood victim of T'Challa's father's mistake.",
  u'title': u'Black Panther',
  u'year': u'2018'},
 {'id': u'1',
  u'summary': u'The Avengers and their allies must be willing to sacrifice all in an attempt to defeat the powerful Thanos before his blitz of devastation and ruin puts an end to the universe.',
  u'title': u'Avengers: Infinity War',
  u'year': u'2018'}]

In [30]:
# lets search for movies within a particular time period
search_query = '''{
  "query": {
    "bool": {
      "filter": {
        "range": {
          "year": {
            "gte": 2015,
            "lte": 2016
          }
        }
      }
    }
  }
}'''

In [33]:
response = es.search(index=INDEX_NAME, doc_type=TYPE_NAME, body=search_query, _source_include=['title', 'summary', 'year'])
pretty_print_es_response(response)

[{'id': u'5',
  u'summary': u'While on a journey of physical and spiritual healing, a brilliant neurosurgeon is drawn into the world of the mystic arts.',
  u'title': u'Doctor Strange',
  u'year': u'2016'},
 {'id': u'8',
  u'summary': u"When Tony Stark and Bruce Banner try to jump-start a dormant peacekeeping program called Ultron, things go horribly wrong and it's up to Earth's mightiest heroes to stop the villainous Ultron from enacting his terrible plan.",
  u'title': u'Avengers: Age of Ultron',
  u'year': u'2015'},
 {'id': u'6',
  u'summary': u"Political involvement in the Avengers' activities causes a rift between Captain America and Iron Man.",
  u'title': u'Captain America: Civil War',
  u'year': u'2016'},
 {'id': u'7',
  u'summary': u'Armed with a super-suit with the astonishing ability to shrink in scale but increase in strength, cat burglar Scott Lang must embrace his inner hero and help his mentor, Dr. Hank Pym, plan and pull off a heist that will save the world.',
  u'title

In [34]:
# lets try function score query
# this query gives includes in the duration of the movie as a factor while computing score
search_query = '''{
  "query": {
    "function_score": {
      "query": {
        "match": {
          "title": "iron man"
        }
      },
      "script_score": {
        "script": {
          "source": "_score * Math.log(1 + doc['duration'].value) "
        }
      }
    }
  }
}'''

In [35]:
response = es.search(index=INDEX_NAME, doc_type=TYPE_NAME, body=search_query, _source_include=['title', 'summary', 'year'])
pretty_print_es_response(response, return_score=True)

[{'id': u'12',
  'score': 14.505959,
  u'summary': u"When Tony Stark's world is torn apart by a formidable terrorist called the Mandarin, he starts an odyssey of rebuilding and retribution.",
  u'title': u'Iron Man 3',
  u'year': u'2013'},
 {'id': u'18',
  'score': 7.566229,
  u'summary': u'After being held captive in an Afghan cave, billionaire engineer Tony Stark creates a unique weaponized suit of armor to fight evil.',
  u'title': u'Iron Man',
  u'year': u'2008'},
 {'id': u'16',
  'score': 6.425105,
  u'summary': u"With the world now aware of his identity as Iron Man, Tony Stark must contend with both his declining health and a vengeful mad man with ties to his father's legacy.",
  u'title': u'Iron Man 2',
  u'year': u'2010'},
 {'id': u'2',
  'score': 6.2628784,
  u'summary': u'Peter Parker balances his life as an ordinary high school student in Queens with his superhero alter-ego Spider-Man, and finds himself on the trail of a new menace prowling the skies of New York City.',
  u'

In [36]:
# lets run a normal match query for `iron man` and see how the score was changed
search_query = '''{
  "query": {
    "match": {
      "title": "iron man"
    }
  }
}'''

In [37]:
match_query_response = es.search(index=INDEX_NAME, doc_type=TYPE_NAME, body=search_query, _source_include=['title', 'summary', 'year'])
pretty_print_es_response(match_query_response, return_score=True)

[{'id': u'12',
  'score': 2.9754608,
  u'summary': u"When Tony Stark's world is torn apart by a formidable terrorist called the Mandarin, he starts an odyssey of rebuilding and retribution.",
  u'title': u'Iron Man 3',
  u'year': u'2013'},
 {'id': u'18',
  'score': 1.5619192,
  u'summary': u'After being held captive in an Afghan cave, billionaire engineer Tony Stark creates a unique weaponized suit of armor to fight evil.',
  u'title': u'Iron Man',
  u'year': u'2008'},
 {'id': u'16',
  'score': 1.3307141,
  u'summary': u"With the world now aware of his identity as Iron Man, Tony Stark must contend with both his declining health and a vengeful mad man with ties to his father's legacy.",
  u'title': u'Iron Man 2',
  u'year': u'2010'},
 {'id': u'2',
  'score': 1.2787021,
  u'summary': u'Peter Parker balances his life as an ordinary high school student in Queens with his superhero alter-ego Spider-Man, and finds himself on the trail of a new menace prowling the skies of New York City.',
  

In [38]:
# iron man 3(doc id 12) gets a score of 14 in function score query and score of 2.9 in match query