In [1]:
import requests
import json
import os

def transform(esDoc):
    pass


# Some utilities for flattening the explain into something a bit more
# readable. Pass Explain JSON, get something readable (ironically this is what Solr's default output is :-p)
def flatten(l):
    [item for sublist in l for item in sublist]

def simplerExplain(explainJson, depth=0):
    result = " " * (depth * 2) + "%s, %s\n" % (explainJson['value'], explainJson['description'])
    #print json.dumps(explainJson, indent=True)
    if 'details' in explainJson:
        for detail in explainJson['details']:
            result += simplerExplain(detail, depth=depth+1)
    return result


# To speed up the pace of development, we really need to focus more heavily on the analysis and query
# settings of the search engine, rather than fidly bits of the http interface.
#
# To that end, we're going to collapse some of the code you were introduced to in chapter 3 into more general functions,
# so we can reuse them. Largely, this is the exact same code you saw in chapter 3 some more generality.

## Analyze
## The analyze function is a helper for accessing the _analyze endpoint like we did in chapter 3. Recall,
## given a field or analyzer, passing some text to _analyze will return the token stream that results from
## that analyzer. This token stream, if you recall, shows us exactly how the search engine translate text
## into individual tokens to be consumed by the underlying data structures. When we debug analysis, we see
## matches we need to expect.
def analyze(text, field=None, analyzer=None):
    whatToAnalyze = ''
    if field is not None:
        whatToAnalyze = "field=%s" % field
    elif analyzer is not None:
        whatToAnalyze = "analyzer=%s" % analyzer
    resp = requests.get("http://localhost:9200/tmdb/_analyze?%s&format=yaml" % whatToAnalyze, 
                        data=text)
    print resp.text
    
## Search
## Next we need to wrap up our execution of query DSL queries. The function 'search' will execute the passed query DSL
## query and display the results. 
## If a scoring explain is associated with the results, then it also gets displayed,
## We'll also be sure to dump the query DSL
def search(query, verbose=False):
    url = 'http://localhost:9200/tmdb/movie/_search'
    httpResp = requests.get(url, data=json.dumps(query))
    if httpResp.status_code != 200:
        print "Search Failed <%s>" % httpResp.status_code
        print "%s" % httpResp.text
    searchHits = json.loads(httpResp.text)['hits']
    print "Num\tRelevance Score\t\tMovie Title"
    for idx, hit in enumerate(searchHits['hits']):
            castNames = []            
            castCharacters = []                        
            directorNames = []
            for cast in hit['_source']['cast']:
                castNames.append(cast['name'])
                castCharacters.append(cast['character'])
            for director in hit['_source']['directors']:
                directorNames.append(director['name'])
            print "%s\t%s\t\t%s\t%s\t%s" % (idx + 1, hit['_score'], 
                                      hit['_source']['title'], 
                                      hit['_source']['vote_average'],
                                      hit['_source']['release_date'])
            if verbose:
                print "%s" % hit['_source']['title']
                print "%s" % hit['_source']['tagline']        
                print "%s" % hit['_source']['overview']        
                print "%s" % hit['_id']
                print "DIRS %s" % directorNames
                print "CAST %s" % castNames
                print "CHAR %s" % castCharacters
                if '_explanation' in hit:
                    print "%s" % simplerExplain(hit['_explanation'])
                    print "*************************************"
    
    if verbose:
        httpResp = requests.get('http://localhost:9200' + 
                    '/tmdb/movie/_validate/query?explain',
                     data=json.dumps({'query': query['query']}))
        print json.loads(httpResp.text)

## Reindex
## Reindex takes analyzer and field mappings, recreates the index, and then reindexes
## TMDB movies using the _bulk index API. There are other ways for modifying the configuration
## of the index besides dropping and restarting, however for convenience and because our data
## isn't truly that large, we'll just delete and start from scratch when we need to.
def reindex(analysisSettings, mappingSettings=None, movieDict={}):
    # Destroy any existing index (equiv to SQL "drop table")
    resp = requests.delete("http://localhost:9200/tmdb")
    print "Delete TMDB Index <%s>" % resp.status_code
    
    # Create the index with explicit settings
    # We need to explicitely set number of shards to 1 to eliminate the impact of 
    # distributed IDF on our small collection
    # See also "Relavance is Broken!"
    # http://www.elastic.co/guide/en/elasticsearch/guide/current/relevance-is-broken.html
    settings = {
        "settings": {
            "number_of_shards": 1,
            "index": {
                "analysis" : analysisSettings,
            }
        }
    }
    if mappingSettings:
        settings['mappings'] = mappingSettings
    resp = requests.put("http://localhost:9200/tmdb", data=json.dumps(settings))
    print "Create TMDB Index <%s>" % resp.status_code
    if resp.status_code != 200:
        print resp.text
    
    # Bulk index title & overview to the movie endpoint
    print "Indexing %i movies" % len(movieDict.keys())
    bulkMovies = ""
    lastMovie = None
    for id, movie in movieDict.iteritems():
        addCmd = {"index": {"_index": "tmdb", "_type": "movie", "_id": movie["id"]}}
        esDoc  = movie
        transform(esDoc)
        bulkMovies += json.dumps(addCmd) + "\n" + json.dumps(esDoc) + "\n"
        lastMovie = movie
    print json.dumps(lastMovie, indent=True)
    resp = requests.post("http://localhost:9200/_bulk", data=bulkMovies)
    print "Bulk Index into TMDB Index <%s>" % resp.status_code


def extract(movieIds=[], numMovies=10000):
    if len(movieIds) == 0:
        try:
            f = open('tmdb.json')
            if f:
                return json.loads(f.read());
        except IOError:
            pass       
    return movieDict

## Index to ES, Chapter 5 Settings

In [2]:
movieDict = extract([])

analysisSettings = {
   "analyzer" : {
      "default" : {
        "type" : "english"
      },
      "english_bigrams": {
          "type": "custom",
          "tokenizer": "standard",
          "filter": [
            "standard",
            "lowercase",
            "porter_stem",
            "bigram_filter"
          ]
      }
    },
  "filter": {
    "bigram_filter": {
        "type": "shingle",
        "max_shingle_size":2,
        "min_shingle_size":2,
        "output_unigrams":"false"
    }
  }
}

            
mappingSettings = {
    'movie': {
        'properties': {
            # Some bug means I have to be explicit about analyzer (ie default analyzer)
            # doesn't apply
            "overview": {
                'type': 'string',
                 'analyzer': 'english',

            },
            "title": {
                'type': 'string',
                 'analyzer': 'english',

            },
            "cast": {
               'properties': {
                  'name': {
                      'type': 'string',
                      'analyzer': 'english',
                      'fields': {
                         "bigramed": {
                            "type": "string",
                            "analyzer": "english_bigrams",
                            #"norms" : {
                            #   "enabled" : False
                            #}
                        }     
                      }
                   }
                   
               }
            },
            "directors": {
               'properties': {
                  'name': {
                      'type': 'string',
                      'analyzer': 'english',
                      'fields': {
                         "bigramed": {
                            "type": "string",
                            "analyzer": "english_bigrams",
                            #"norms" : {
                            #   "enabled" : False
                            #}
                        }                       
                      },
                 
                   }
                   
               }
            }            
        }
    }
}

reindex(analysisSettings, mappingSettings, movieDict)

Delete TMDB Index <200>
Create TMDB Index <200>
Indexing 3051 movies
{
 "poster_path": "/xYLmHi80zIlu3JDfJYfd1JY0Xl7.jpg", 
 "production_countries": [
  {
   "iso_3166_1": "US", 
   "name": "United States of America"
  }
 ], 
 "revenue": 300400432, 
 "overview": "Col. Troutman recruits ex-Green Beret John Rambo for a highly secret and dangerous mission. Teamed with freedom fighter Co Bao, Rambo goes deep into Vietnam to rescue POWs. Deserted by his own team, he's left in a hostile jungle to fight for his life, avenge the death of a woman and bring corrupt officials to justice.", 
 "video": false, 
 "id": 1369, 
 "genres": [
  {
   "id": 28, 
   "name": "Action"
  }, 
  {
   "id": 12, 
   "name": "Adventure"
  }, 
  {
   "id": 18, 
   "name": "Drama"
  }, 
  {
   "id": 53, 
   "name": "Thriller"
  }, 
  {
   "id": 10752, 
   "name": "War"
  }
 ], 
 "title": "Rambo: First Blood Part II", 
 "tagline": "What most people call hell, he calls home.", 
 "vote_count": 224, 
 "homepage": "", 
 "

# Last Query from Chapter 5

In [34]:
usersSearch = 'star trek patrick stewart william shatner'
query = {
    'query': {
        'bool': {
            'should': [ 
                {
                   'multi_match': { 
                      'query': usersSearch,  #User's query
                      'fields': ['directors.name.bigramed', #B 
				      'cast.name.bigramed'],
                      'type': 'cross_fields'
                      }
                 },
                {
                   'multi_match': {
                     'query': usersSearch,  #User's query
                     'fields': ['overview', 'title', #C
  'directors.name', 'cast.name'],
                     'type': 'cross_fields'                                
                   }
                },
            ]
        }
    },
    'size': 5,
    'explain': True
}
search(query) 


Num	Relevance Score		Movie Title
1	1.6669365		Star Trek: Generations	6.5	1994-11-17
2	1.5123603		Star Trek V: The Final Frontier	5.4	1989-06-09
3	1.0779369		Star Trek: Nemesis	6.3	2002-12-12
4	0.9057324		Star Trek: The Motion Picture	6.0	1979-12-06
5	0.8793935		Star Trek: Insurrection	6.3	1998-12-10


# 7.2.1, Listing 1 Base Query

In [35]:
usersSearch = 'william shatner patrick stewart'
query = {
    'query': {
        'multi_match': {
           'query': usersSearch,  #User's query
            'fields': ['overview', 'title', #C
                       'directors.name', 'cast.name'],
            'type': 'cross_fields'                                
         }
    },
    'size': 5,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	0.79947156		Star Trek V: The Final Frontier	5.4	1989-06-09
2	0.67931885		Star Trek: Generations	6.5	1994-11-17
3	0.4375222		The Wild	5.0	2006-04-13
4	0.38154808		Dark Skies	6.0	2013-02-21
5	0.32485005		Showtime	5.3	2002-03-14


# 7.2.3, Listing 2 Boosting with An Additional Boolean Clause

In [36]:
usersSearch = 'william shatner patrick stewart'
query = {
    'query': {
        'bool': {
            'should': [
            {'multi_match': { #A
               'query': usersSearch,  #User's query
                'fields': ['overview', 'title', #C
                           'directors.name', 'cast.name'],
                'type': 'cross_fields'                                
             }},
            { #B
               'match_phrase': {
                    'title': {
                        'query': 'star trek',
                    }
                }
            }
            ]
        }
    },
    'size': 5,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	4.363374		Star Trek	7.3	2009-05-07
2	4.0461645		Star Trek: Generations	6.5	1994-11-17
3	3.7096446		Star Trek V: The Final Frontier	5.4	1989-06-09
4	3.6913855		Star Trek: Nemesis	6.3	2002-12-12
5	3.653065		Star Trek: The Motion Picture	6.0	1979-12-06


# 7.2.3, Adjusted Boost Weight on Boolean Query (no listing no, modification of above listing)

In [37]:
usersSearch = 'william shatner patrick stewart'
query = {
    'query': {
        'bool': {
            'should': [
            {'multi_match': { #A
               'query': usersSearch,  #User's query
                'fields': ['overview', 'title', #C
                           'directors.name', 'cast.name'],
                'type': 'cross_fields'                                
             }},
            { #B
               'match_phrase': {
                    'title': {
                        'query': 'star trek',
                        'boost': 0.1
                    }
                }
            }
            ]
        }
    },
    'size': 5,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	1.1662666		Star Trek V: The Final Frontier	5.4	1989-06-09
2	1.0990597		Star Trek: Generations	6.5	1994-11-17
3	0.6702043		Star Trek: Nemesis	6.3	2002-12-12
4	0.62388283		Star Trek: The Motion Picture	6.0	1979-12-06
5	0.6117288		Star Trek II: The Wrath of Khan	7.1	1982-06-03


# 7.2.5, Listing 3 -- Multiplicative Boosting on Title Star Trek match

In [38]:
usersSearch = 'william shatner patrick stewart'
query = {
    'query': {
        'function_score': {
            'query': {
                 'multi_match': {
                   'query': usersSearch,  #User's query
                    'fields': ['overview', 'title', #C
                               'directors.name', 'cast.name'],
                    'type': 'cross_fields'                                
                 }
             },
             'functions': [
                {
                     'weight': 2.5,
                     'filter': {
                        'query': {
                             'match_phrase': {
                                    'title': 'star trek'
                            }
                        }
                     }
                 }
            ]
        }
    },
    'size': 50,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	1.9986789		Star Trek V: The Final Frontier	5.4	1989-06-09
2	1.6982971		Star Trek: Generations	6.5	1994-11-17
3	0.6236526		Star Trek: Nemesis	6.3	2002-12-12
4	0.60909384		Star Trek II: The Wrath of Khan	7.1	1982-06-03
5	0.5075782		Star Trek IV: The Voyage Home	6.7	1986-11-25
6	0.5075782		Star Trek: The Motion Picture	6.0	1979-12-06
7	0.5075782		Star Trek III: The Search for Spock	6.3	1984-05-31
8	0.4375222		The Wild	5.0	2006-04-13
9	0.40988445		Star Trek: Insurrection	6.3	1998-12-10
10	0.40988445		Star Trek: First Contact	6.9	1996-11-21
11	0.40606257		Star Trek VI: The Undiscovered Country	6.6	1991-12-05
12	0.38154808		Dark Skies	6.0	2013-02-21
13	0.32485005		Showtime	5.3	2002-03-14
14	0.32485005		Osmosis Jones	5.4	2001-08-07
15	0.29482967		Bill & Ted's Bogus Journey	5.8	1991-07-19
16	0.26210624		Miss Congeniality 2: Armed and Fabulous	5.3	2005-03-11
17	0.2606765		Conspiracy Theory	6.2	1997-08-07
18	0.23332866		Drive Angry	5.5	2011-02-24
19	0.22700992	

# 7.3, Listing 4 -- Using a Filter Instead of A Boost

In [4]:
usersSearch = 'william shatner patrick stewart'
query = {
    'query': {
        'bool': {
          'should': [
            {    'multi_match': {
                   'query': usersSearch,  #User's query
                    'fields': ['overview', 'title', #C
                               'directors.name', 'cast.name'],
                    'type': 'cross_fields'                                
                 }
            }],
          'filter': [{
            'query': {
                'match_phrase': {
                    'title': 'star trek'
                }
            }
          }]
        }
    },
    'size': 5,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	0.79947156		Star Trek V: The Final Frontier	5.4	1989-06-09
2	0.67931885		Star Trek: Generations	6.5	1994-11-17
3	0.24946104		Star Trek: Nemesis	6.3	2002-12-12
4	0.24363753		Star Trek II: The Wrath of Khan	7.1	1982-06-03
5	0.20303127		Star Trek IV: The Voyage Home	6.7	1986-11-25


# 7.4.2, Listings 5&7 Sentinel Tokens (includes exact name matching)

In [40]:
SENTINEL_BEGIN = 'SENTINEL_BEGIN'
SENTINEL_END = 'SENTINEL_END'
def transform(esDoc):    
    esDoc['title_exact_match'] = SENTINEL_BEGIN + ' ' + esDoc['title'] + ' ' + SENTINEL_END
    esDoc['names_exact_match'] = []
    for person in esDoc['cast'] + esDoc['directors']:
        esDoc['names_exact_match'].append(SENTINEL_BEGIN + ' ' + person['name'] + ' ' + SENTINEL_END)
        


        
reindex(analysisSettings, mappingSettings, movieDict)

Delete TMDB Index <200>
Create TMDB Index <200>
Indexing 3051 movies
{
 "poster_path": "/xYLmHi80zIlu3JDfJYfd1JY0Xl7.jpg", 
 "production_countries": [
  {
   "iso_3166_1": "US", 
   "name": "United States of America"
  }
 ], 
 "revenue": 300400432, 
 "overview": "Col. Troutman recruits ex-Green Beret John Rambo for a highly secret and dangerous mission. Teamed with freedom fighter Co Bao, Rambo goes deep into Vietnam to rescue POWs. Deserted by his own team, he's left in a hostile jungle to fight for his life, avenge the death of a woman and bring corrupt officials to justice.", 
 "title_exact_match": "SENTINEL_BEGIN Rambo: First Blood Part II SENTINEL_END", 
 "video": false, 
 "id": 1369, 
 "genres": [
  {
   "id": 28, 
   "name": "Action"
  }, 
  {
   "id": 12, 
   "name": "Adventure"
  }, 
  {
   "id": 18, 
   "name": "Drama"
  }, 
  {
   "id": 53, 
   "name": "Thriller"
  }, 
  {
   "id": 10752, 
   "name": "War"
  }
 ], 
 "title": "Rambo: First Blood Part II", 
 "tagline": "What m

# 7.4.2, Listing 6 -- Testing Exact Title Matching

In [41]:
usersSearch = 'star trek'
query = {
    'query': {
        'match_phrase': {
            'title_exact_match': {
                'query': SENTINEL_BEGIN + ' ' + usersSearch + ' ' + SENTINEL_END,
            }
        }
    },
    'size': 5,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	7.03938		Star Trek	7.3	2009-05-07


# 7.4.3, Listing 8 Boolean Boost on Exact Title Matching

In [42]:
usersSearch = 'good will hunting'
query = {
    'query': {
        'bool': {
            'disable_coord': True,
            'should': [
                {'match_phrase': {
                    'title_exact_match': {
                        'query': SENTINEL_BEGIN + ' ' + usersSearch + ' ' + SENTINEL_END,
                        'boost': 1000,
                    }          
                }},
                {'multi_match': {
                   'query': usersSearch,  #User's query
                    'fields': ['overview', 'title', #C
                               'directors.name', 'cast.name'],
                    'type': 'cross_fields'                                
                 }},
                
            ]
        }
    },
    'size': 5,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	7.7802196		Good Will Hunting	7.4	1997-12-05
2	0.0016732097		The Hunt	7.9	2012-10-25
3	0.0012773767		Good Night, and Good Luck.	6.4	2005-09-16
4	0.0011133142		As Good as It Gets	6.6	1997-12-19
5	0.0005879917		Saw V	6.1	2008-10-23


## No Listing -- Adding a query mentions name boost

### First Attempt, search bigramed fields without modification

In [43]:
usersSearch = 'star trek patrick stewart'
query = {
    'query': {
        'bool': {
            'disable_coord': True,
            'should': [
                {'match_phrase': {
                    'title_exact_match': {
                        'query': SENTINEL_BEGIN + ' ' + usersSearch + ' ' + SENTINEL_END,
                        'boost': 1000,
                    }          
                }},
                {'multi_match': {
                   'query': usersSearch,  #User's query
                    'fields': ['overview', 'title', #C
                               'directors.name', 'cast.name'],
                    'type': 'cross_fields'
                }},
                {'multi_match': {
                   'query': usersSearch,  #User's query
                    'fields': ['directors.name.bigramed', 'cast.name.bigramed'],
                    'type': 'cross_fields',
                   'boost': 100
                 }}
                
            ]
        }
    },
    'size': 20,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	0.010597555		Star Trek: Insurrection	6.3	1998-12-10
2	0.010597555		Star Trek: First Contact	6.9	1996-11-21
3	0.009586617		Gnomeo & Juliet	5.9	2011-01-13
4	0.009586617		Excalibur	6.7	1981-04-10
5	0.0093802195		Star Trek: Nemesis	6.3	2002-12-12
6	0.0093802195		Star Trek: Generations	6.5	1994-11-17
7	0.008384489		Conspiracy Theory	6.2	1997-08-07
8	0.008384489		The Wolverine	6.4	2013-07-25
9	0.008384489		Dune	6.5	1984-12-14
10	0.008384489		X-Men	6.5	2000-07-14
11	0.008384489		Robin Hood: Men in Tights	6.2	1993-07-28
12	0.0071982313		X-Men: Days of Future Past	7.7	2014-05-23
13	0.00718236		X2: X-Men United	6.5	2003-04-27
14	0.00718236		TMNT	6.0	2007-03-22
15	0.00718236		The Prince of Egypt	6.7	1998-12-15
16	0.00718236		X-Men: The Last Stand	6.1	2006-05-26
17	0.006091906		Ted	6.3	2012-06-29
18	0.0047915326		Chicken Little	5.4	2005-11-04
19	0.00057481206		Star Trek	7.3	2009-05-07
20	0.00045984966		Star Trek: The Motion Picture	6.0	1979-12-06


In [49]:
mappingSettings['movie']['properties'] \
               ['cast']['properties'] \
               ['name']['fields']['bigramed']['norms'] = {'enabled': False}
        
reindex(analysisSettings, mappingSettings, movieDict)

Delete TMDB Index <200>
Create TMDB Index <200>
Indexing 3051 movies
{
 "poster_path": "/xYLmHi80zIlu3JDfJYfd1JY0Xl7.jpg", 
 "production_countries": [
  {
   "iso_3166_1": "US", 
   "name": "United States of America"
  }
 ], 
 "revenue": 300400432, 
 "overview": "Col. Troutman recruits ex-Green Beret John Rambo for a highly secret and dangerous mission. Teamed with freedom fighter Co Bao, Rambo goes deep into Vietnam to rescue POWs. Deserted by his own team, he's left in a hostile jungle to fight for his life, avenge the death of a woman and bring corrupt officials to justice.", 
 "title_exact_match": "SENTINEL_BEGIN Rambo: First Blood Part II SENTINEL_END", 
 "video": false, 
 "id": 1369, 
 "genres": [
  {
   "id": 28, 
   "name": "Action"
  }, 
  {
   "id": 12, 
   "name": "Adventure"
  }, 
  {
   "id": 18, 
   "name": "Drama"
  }, 
  {
   "id": 53, 
   "name": "Thriller"
  }, 
  {
   "id": 10752, 
   "name": "War"
  }
 ], 
 "title": "Rambo: First Blood Part II", 
 "tagline": "What m

### Rerunning with Norms Off For Bigrams

In [46]:
usersSearch = 'star trek patrick stewart'
query = {
    'query': {
        'bool': {
            'disable_coord': True,
            'should': [
                {'match_phrase': {
                    'title_exact_match': {
                        'query': SENTINEL_BEGIN + ' ' + usersSearch + ' ' + SENTINEL_END,
                        'boost': 1000,
                    }          
                }},
                {'multi_match': {
                   'query': usersSearch,  #User's query
                    'fields': ['overview', 'title', #C
                               'directors.name', 'cast.name'],
                    'type': 'cross_fields'
                }},
                {'multi_match': {
                   'query': usersSearch,  #User's query
                    'fields': ['directors.name.bigramed', 'cast.name.bigramed'],
                    'type': 'cross_fields',
                   'boost': 100
                 }}
                
            ]
        }
    },
    'size': 20,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	0.03920228		Star Trek: Insurrection	6.3	1998-12-10
2	0.03920228		Star Trek: First Contact	6.9	1996-11-21
3	0.03917096		Star Trek: Nemesis	6.3	2002-12-12
4	0.03917096		Star Trek: Generations	6.5	1994-11-17
5	0.03820324		Gnomeo & Juliet	5.9	2011-01-13
6	0.03820324		Excalibur	6.7	1981-04-10
7	0.03818829		X-Men: Days of Future Past	7.7	2014-05-23
8	0.038187582		Conspiracy Theory	6.2	1997-08-07
9	0.038187582		The Wolverine	6.4	2013-07-25
10	0.038187582		Dune	6.5	1984-12-14
11	0.038187582		X-Men	6.5	2000-07-14
12	0.038171925		X2: X-Men United	6.5	2003-04-27
13	0.038171925		TMNT	6.0	2007-03-22
14	0.038171925		The Prince of Egypt	6.7	1998-12-15
15	0.038171925		X-Men: The Last Stand	6.1	2006-05-26
16	0.03815442		Chicken Little	5.4	2005-11-04
17	0.00056567905		Star Trek	7.3	2009-05-07
18	0.0004525432		Star Trek: The Motion Picture	6.0	1979-12-06
19	0.0004525432		Star Trek Into Darkness	7.5	2013-05-16
20	0.0003959753		Star Trek VI: The Undiscovered Country	6.6	1

# 7.4.4.1 Exact Name Matching Function Query Skeleton using TF*IDF (no listing number)

In [14]:
query = {
    'query': {
        'function_score': {
            'query': {
                'match_phrase': {
                    'names_exact_match': SENTINEL_BEGIN + ' william shatner ' + SENTINEL_END
                }
            },          
            'functions': [
               
            ]
        }        
    }
}
search(query)

Num	Relevance Score		Movie Title
1	1.9612461		Star Trek V: The Final Frontier	5.4	1989-06-09
2	1.733513		Showtime	5.3	2002-03-14
3	1.733513		Osmosis Jones	5.4	2001-08-07
4	1.3868104		Star Trek IV: The Voyage Home	6.7	1986-11-25
5	1.3868104		Star Trek II: The Wrath of Khan	7.1	1982-06-03
6	1.3868104		Miss Congeniality 2: Armed and Fabulous	5.3	2005-03-11
7	1.2134591		Over the Hedge	6.2	2006-04-22
8	1.2134591		Escape from Planet Earth	5.9	2013-02-14
9	1.2134591		Star Trek: The Motion Picture	6.0	1979-12-06
10	1.2134591		Star Trek III: The Search for Spock	6.3	1984-05-31


# 7.4.4.1 Exact Name Matching Function, Ignoring TF*IDF

In [16]:
query = {
    'query': {
        'function_score': {
            'query': {
                'constant_score': {
                    'query': {
                        'match_phrase': {
                            'names_exact_match': SENTINEL_BEGIN + ' william shatner ' + SENTINEL_END
                        }
                    },
                    'boost': 1000.0
                }
            },          
            'functions': [
            ]
        }        
    }
}
search(query)

Num	Relevance Score		Movie Title
1	1000.0		Over the Hedge	6.2	2006-04-22
2	1000.0		Showtime	5.3	2002-03-14
3	1000.0		Escape from Planet Earth	5.9	2013-02-14
4	1000.0		Star Trek VI: The Undiscovered Country	6.6	1991-12-05
5	1000.0		Star Trek V: The Final Frontier	5.4	1989-06-09
6	1000.0		Star Trek IV: The Voyage Home	6.7	1986-11-25
7	1000.0		Star Trek: The Motion Picture	6.0	1979-12-06
8	1000.0		Star Trek II: The Wrath of Khan	7.1	1982-06-03
9	1000.0		Star Trek III: The Search for Spock	6.3	1984-05-31
10	1000.0		Miss Congeniality	6.0	2000-12-14


# 7.4.4.1, Listing 9 Turning User Rating into A Signal

In [16]:
query = {
    'query': {
        'function_score': {
            'query': {
                'match_all': {}
            },          
            'functions': [
            {
                 "field_value_factor": {
                 "field": "vote_average",
                 "modifier": "sqrt"
                }
            }]
        }        
    }
}
search(query)

Num	Relevance Score		Movie Title
1	2.9325757		Whiplash	8.6	2014-10-10
2	2.9154758		Feast	8.5	2014-11-07
3	2.8982754		Paperman	8.4	2012-11-02
4	2.8982754		Mommy	8.4	2014-09-19
5	2.8982754		Interstellar	8.4	2014-11-05
6	2.8982754		Avengers: Age of Ultron	8.4	2015-05-01
7	2.8809721		The Tale of the Princess Kaguya	8.3	2013-11-23
8	2.8809721		Presto	8.3	2008-06-26
9	2.8635643		The Shawshank Redemption	8.2	1994-09-14
10	2.8635643		Wolf Children	8.2	2012-08-29


# 7.4.4.2 Listing 10, Turning Recency of Release Into A Signal (first try)

In [17]:
query = {
    'query': {
        'function_score': {
            'query': {
                'match_all': {}
            },          
            'functions': [
            {
                 "gauss": {
                    "release_date": {
                        "origin": "now",
                        "scale": "900d",
                        "decay": 0.5
                    }
                }
            }]
        }        
    },
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	0.99437076		Resident Evil: Rising	6.1	2016-01-01
2	0.9773075		Avengers: Age of Ultron	8.4	2015-05-01
3	0.9690168		Furious 7	7.8	2015-04-03
4	0.9667524		Home	7.2	2015-03-27
5	0.9667524		It Follows	7.5	2015-03-27
6	0.96441245		Insurgent	7.3	2015-03-20
7	0.9619975		Frozen Fever	7.0	2015-03-13
8	0.9619975		Cinderella	7.3	2015-03-13
9	0.9595081		Chappie	7.0	2015-03-06
10	0.9569449		Jupiter Ascending	5.6	2015-02-27


# 7.4.4.2 Listing 10, Adjusted Gaussian Decay (set scale to 15 years)

In [18]:
query = {
    'query': {
        'function_score': {
            'query': {
                'match_all': {}
            },          
            'functions': [
            {
                 "gauss": {
                    "release_date": {
                        "origin": "now",
                        "scale": "5500d",
                        "decay": 0.5
                    }
                }
            }]
        }        
    },
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	0.99984884		Resident Evil: Rising	6.1	2016-01-01
2	0.99938554		Avengers: Age of Ultron	8.4	2015-05-01
3	0.9991576		Furious 7	7.8	2015-04-03
4	0.999095		Home	7.2	2015-03-27
5	0.999095		It Follows	7.5	2015-03-27
6	0.9990302		Insurgent	7.3	2015-03-20
7	0.9989631		Frozen Fever	7.0	2015-03-13
8	0.9989631		Cinderella	7.3	2015-03-13
9	0.9988938		Chappie	7.0	2015-03-06
10	0.9988223		Jupiter Ascending	5.6	2015-02-27


# 7.4.4.3 -- Complete Name Boost

In [19]:
usersSearch = 'patrick stewart'
query = {
    'query': {
        'function_score': {
            'query': {
                'constant_score': {
                    'query': {
                        'match_phrase': {
                            'names_exact_match': SENTINEL_BEGIN + ' ' + usersSearch + ' ' + SENTINEL_END
                        }
                    },
                    'boost': 1000.0
                }            
            },          
            'functions': [
            {
                 "gauss": {
                    "release_date": {
                        "origin": "now",
                        "scale": "5500d",
                        "decay": 0.5
                    }
                }
            },
            {
                "field_value_factor": {
                 "field": "vote_average",
                 "modifier": "sqrt"
                }
            }
            
            ]
        }        
    },
    'explain': True,
    'size': 20
}
search(query)

Num	Relevance Score		Movie Title
1	2.7586055		X-Men: Days of Future Past	7.7	2014-05-23
2	2.4921865		The Wolverine	6.4	2013-07-25
3	2.4285414		Ted	6.3	2012-06-29
4	2.2674968		Gnomeo & Juliet	5.9	2011-01-13
5	1.9581478		TMNT	6.0	2007-03-22
6	1.8874506		X-Men: The Last Stand	6.1	2006-05-26
7	1.7185271		Chicken Little	5.4	2005-11-04
8	1.5862336		X2: X-Men United	6.5	2003-04-27
9	1.5173253		Star Trek: Nemesis	6.3	2002-12-12
10	1.2530301		X-Men	6.5	2000-07-14
11	1.0896553		The Prince of Egypt	6.7	1998-12-15
12	1.0551405		Star Trek: Insurrection	6.3	1998-12-10
13	0.9067166		Conspiracy Theory	6.2	1997-08-07
14	0.88268507		Star Trek: First Contact	6.9	1996-11-21
15	0.6707222		Star Trek: Generations	6.5	1994-11-17
16	0.55149966		Robin Hood: Men in Tights	6.2	1993-07-28
17	0.13964182		Dune	6.5	1984-12-14
18	0.06799161		Excalibur	6.7	1981-04-10


# Not Shown In Chapter: The Whole Shebang

In [21]:
usersSearch = 'patrick stewart'
query = {
    'query': {
        'bool': {
            'disable_coord': True,
            'should': [
                {'match_phrase': {
                    'title_exact_match': {
                        'query': SENTINEL_BEGIN + ' ' + usersSearch + ' ' + SENTINEL_END,
                        'boost': 1000,
                    }          
                }},
                {
                   'function_score': {
                        'query': {
                            'constant_score': {
                                'query': {
                                    'match_phrase': {
                                        'names_exact_match': SENTINEL_BEGIN + ' ' + usersSearch + ' ' + SENTINEL_END
                                    }
                                },
                                'boost': 1000.0
                            }            
                        },          
                        'functions': [
                        {
                             "gauss": {
                                "release_date": {
                                    "origin": "now",
                                    "scale": "5500d",
                                    "decay": 0.5
                                }
                            }
                        },
                        {
                            "field_value_factor": {
                             "field": "vote_average",
                             "modifier": "sqrt"
                            }
                        }
                        
                        ]
                    }                    
                },
                {'multi_match': {
                   'query': usersSearch,  #User's query
                    'fields': ['overview', 'title', #C
                               'directors.name', 'cast.name'],
                    'type': 'cross_fields'                                
                 }},
                
            ]
        }
    },
    'size': 5,
    'explain': True
}
search(query)

Num	Relevance Score		Movie Title
1	2764.44		X-Men: Days of Future Past	7.7	2014-05-23
2	2497.8433		The Wolverine	6.4	2013-07-25
3	2433.9927		Ted	6.3	2012-06-29
4	2274.0195		Gnomeo & Juliet	5.9	2011-01-13
5	1962.7107		TMNT	6.0	2007-03-22
