# Setup

In [1]:
#import
from elasticsearch import Elasticsearch
from elasticsearch import helpers
import pickle

  


In [2]:
#initialize
es = Elasticsearch()

In [16]:
movies=pickle.load(open("../movies.p","rb"))

In [26]:
try:
    es.indices.delete("tmdb")
except:
    pass

#create index here?
# genres.name needs to be keyword tokenized so that 'science fiction' doesn't get split on white space
# maybe create a text field with title and overview to search against
body = {
    "mappings": {
        "properties": {
          "status": {
              "type": "keyword",
          },    
          "title": {
            "type": "text",
            "analyzer": "english",
            "term_vector": "with_positions_offsets"},
          "overview": {
            "type": "text",
            "analyzer": "english",
            "term_vector": "with_positions_offsets"}}}}
es.indices.create(index="tmdb",mappings=body['mappings'])

  


{u'acknowledged': True, u'index': u'tmdb', u'shards_acknowledged': True}

In [27]:
#doc indexer
def format_doc(doc):
    action = {
        "_index": "tmdb",
        "_id": doc['id'],
        "_source": doc
        }
    return action

def index_movies():
    actions = (format_doc(doc) for doc in movies)
    results = [details for success,details in helpers.streaming_bulk(es, actions) if not success]
    return results

In [28]:
results = index_movies()

# Examples

## Highlighting

In [29]:
# highlighting
# notice that 
body = {
    "fields": ["title","overview"],
    "query":{
        "match":{
            "title": "stasr trek"}},
    "highlight": {
        "order" : "score",  # probably doesn't matter much for such a short field
        "type" : "fvh",
        "fields": {
          "title": {
            "number_of_fragments": 0},  # to ensure that the entire title is returned 
          "overview": {
            "fragment_size": 100,
            "number_of_fragments": 3,
            "no_match_size": 100 }
        },
        "pre_tags" : ["<em class=\"hlt1\">","<em class=\"hlt2\">"],
        "post_tags" : ["</em>"]}}
es.search(index="tmdb",body=body,size=100)



{u'_shards': {u'failed': 0, u'skipped': 0, u'successful': 1, u'total': 1},
 u'hits': {u'hits': [{u'_id': u'13475',
    u'_index': u'tmdb',
    u'_score': 5.4387894,
    u'_source': {u'adult': False,
     u'backdrop_path': u'/1XOSh6BFZbQ0xN75m4avqgzClyG.jpg',
     u'belongs_to_collection': {u'backdrop_path': u'/suPwXZLPMXQz7Cl6zjBQ9SJoRJ6.jpg',
      u'id': 115575,
      u'name': u'Star Trek: Alternate Reality Collection',
      u'poster_path': u'/w2IVqa43Ge49mrY3qSaJWpyDtz6.jpg'},
     u'budget': 150000000,
     u'genres': [{u'id': 28, u'name': u'Action'},
      {u'id': 12, u'name': u'Adventure'},
      {u'id': 878, u'name': u'Science Fiction'}],
     u'homepage': u'http://www.startrekmovie.com/',
     u'id': 13475,
     u'imdb_id': u'tt0796366',
     u'original_language': u'en',
     u'original_title': u'Star Trek',
     u'overview': u'The fate of the galaxy rests in the hands of bitter rivals. One, James Kirk, is a delinquent, thrill-seeking Iowa farm boy. The other, Spock, a Vulcan,

## Grouping

In [30]:
# grouping according to movie status
body = {
    "query":{
        "match":{
            "title": "stars trek"}},
    "aggs": {
        "statuses": {
            "terms": {"field":"status"},
            "aggs": {
                "hits": {
                    "top_hits": {}}}}}}
es.search(index="tmdb",body=body)

  if sys.path[0] == '':


{u'_shards': {u'failed': 0, u'skipped': 0, u'successful': 1, u'total': 1},
 u'aggregations': {u'statuses': {u'buckets': [{u'doc_count': 9,
     u'hits': {u'hits': {u'hits': [{u'_id': u'13475',
         u'_index': u'tmdb',
         u'_score': 9.447634,
         u'_source': {u'adult': False,
          u'backdrop_path': u'/1XOSh6BFZbQ0xN75m4avqgzClyG.jpg',
          u'belongs_to_collection': {u'backdrop_path': u'/suPwXZLPMXQz7Cl6zjBQ9SJoRJ6.jpg',
           u'id': 115575,
           u'name': u'Star Trek: Alternate Reality Collection',
           u'poster_path': u'/w2IVqa43Ge49mrY3qSaJWpyDtz6.jpg'},
          u'budget': 150000000,
          u'genres': [{u'id': 28, u'name': u'Action'},
           {u'id': 12, u'name': u'Adventure'},
           {u'id': 878, u'name': u'Science Fiction'}],
          u'homepage': u'http://www.startrekmovie.com/',
          u'id': 13475,
          u'imdb_id': u'tt0796366',
          u'original_language': u'en',
          u'original_title': u'Star Trek',
         

In [32]:
# field collapsing according to language
body = {
    "query":{
        "match":{
            "title": "stars trek"}},
    "aggs": {
        "statuses": {
            "terms": {
                "field":"status",
                "order": {"top_score": "desc"}}, 
            "aggs": {
                "hits": {
                    "top_hits": {"size":1}},
                "top_score": {
                    "max": { "script": "_score"}}
            }}}}
es.search(index="tmdb",body=body)



{u'_shards': {u'failed': 0, u'skipped': 0, u'successful': 1, u'total': 1},
 u'aggregations': {u'statuses': {u'buckets': [{u'doc_count': 9,
     u'hits': {u'hits': {u'hits': [{u'_id': u'13475',
         u'_index': u'tmdb',
         u'_score': 9.447634,
         u'_source': {u'adult': False,
          u'backdrop_path': u'/1XOSh6BFZbQ0xN75m4avqgzClyG.jpg',
          u'belongs_to_collection': {u'backdrop_path': u'/suPwXZLPMXQz7Cl6zjBQ9SJoRJ6.jpg',
           u'id': 115575,
           u'name': u'Star Trek: Alternate Reality Collection',
           u'poster_path': u'/w2IVqa43Ge49mrY3qSaJWpyDtz6.jpg'},
          u'budget': 150000000,
          u'genres': [{u'id': 28, u'name': u'Action'},
           {u'id': 12, u'name': u'Adventure'},
           {u'id': 878, u'name': u'Science Fiction'}],
          u'homepage': u'http://www.startrekmovie.com/',
          u'id': 13475,
          u'imdb_id': u'tt0796366',
          u'original_language': u'en',
          u'original_title': u'Star Trek',
         