In [1]:
import requests
import json

In [2]:
def extract():
    f = open('../chapter-3/tmdb.json')
    if f:
        return json.loads(f.read())

In [131]:
def reindex(analysis_settings={}, mapping_settings={}, movie_dict = {}):
    settings = {
        "settings": {
            "number_of_shards": 1,
            "index": {
                "analysis": analysis_settings,
            }
        }
    }
    
    if mapping_settings:
        settings["mappings"] = mapping_settings
    
    
    print("using stettings\n", settings)
    resp = requests.delete("http://localhost:9200/tmdb")
    print(f"previouse index removal: {resp.status_code}")
    resp = requests.put("http://localhost:9200/tmdb", json=settings)
    print("index creation", resp.json())
    
    bulk_movies = ""
    for id, movie in movie_dict.items():
        add_cmd = {
            "index": {"_index": "tmdb", "_id": movie["id"]}
        }
        bulk_movies += json.dumps(add_cmd) + "\n" + json.dumps(movie) + "\n"
    
    resp = requests.post(
        "http://localhost:9200/_bulk",
        data=bulk_movies,
        headers={'Content-Type': 'application/json'}
    )
    return resp

In [19]:
def search(query):
    url = 'http://localhost:9200/tmdb/_doc/_search'
    resp = requests.get(url, json=query)
    
    search_hits = json.loads(resp.text)['hits']
    
    print("Num\tid\tRelevance\tMovie title")
    
    for idx, hit in enumerate(search_hits['hits']):
        print(f"{idx}\t{hit['_id']}\t{hit['_score']}\t{hit['_source']['title']}")

In [5]:
movie_dict = extract()

In [6]:
analysis = {
    "analyzer": {
        "default": {
            "type": "english"
        }
    }
}

In [7]:
reindex(analysis_settings=analysis, mapping_settings=None, movie_dict=movie_dict)

<Response [200]>

In [8]:
user_search = "basketball with cartoon aliens"

In [9]:
query = {
    "query": {
        "multi_match": {
            "query": user_search,
            "fields": ["title^0.1", "overview"]
        }
    }
}

In [15]:
search(query)

Num	Relevance	Movie title
0	12.882349	Space Jam
1	7.5384703	Grown Ups
2	7.4996777	Speed Racer
3	7.244087	Semi-Pro
4	7.1626425	The Flintstones
5	6.943389	Coach Carter
6	6.7653713	White Men Can't Jump
7	5.845222	Meet Dave
8	5.800564	Aliens vs Predator: Requiem
9	5.440302	Bedazzled


In [11]:
space_jam_id = 2300
IDX_URI = "http://localhost:9200/tmdb"

In [12]:
resp = requests.get(f"{IDX_URI}/_doc/{space_jam_id}")

In [13]:
space_jam_doc = resp.json()

In [14]:
print(json.dumps(space_jam_doc['_source'], indent=True))

{
 "poster_path": "/9T9ucCk6wO0crRBUIkBJMRAVcKp.jpg",
 "production_countries": [
  {
   "iso_3166_1": "US",
   "name": "United States of America"
  }
 ],
 "revenue": 230000000,
 "overview": "Michael Jordan agrees to help the Looney Tunes play a basketball game against alien slavers to determine their freedom.",
 "video": false,
 "id": 2300,
 "genres": [
  {
   "id": 16,
   "name": "Animation"
  },
  {
   "id": 35,
   "name": "Comedy"
  },
  {
   "id": 18,
   "name": "Drama"
  },
  {
   "id": 14,
   "name": "Fantasy"
  },
  {
   "id": 10751,
   "name": "Family"
  }
 ],
 "title": "Space Jam",
 "tagline": "Get ready to jam.",
 "vote_count": 275,
 "homepage": "",
 "belongs_to_collection": null,
 "original_language": "en",
 "status": "Released",
 "spoken_languages": [
  {
   "iso_639_1": "cs",
   "name": "\u010cesk\u00fd"
  },
  {
   "iso_639_1": "fr",
   "name": "Fran\u00e7ais"
  },
  {
   "iso_639_1": "pl",
   "name": "Polski"
  },
  {
   "iso_639_1": "en",
   "name": "English"
  }
 ],
 "

In [16]:
user_search = "patrick stewart"

In [25]:
query = {
    "query": {
        "multi_match": {
            "query": user_search,
            "fields": ["title", "overview", "cast.name", "directors.name^0.1"],
            "type": "best_fields"
        }
    }
}

In [26]:
search(query)

Num	id	Relevance	Movie title
0	18126	7.1845765	Hannah Montana: The Movie
1	426	7.1596975	Vertigo
2	200	6.9732013	Star Trek: Insurrection
3	510	6.9093037	One Flew Over the Cuckoo's Nest
4	199	6.71099	Star Trek: First Contact
5	11527	6.5468693	Excalibur
6	45772	6.5468693	Gnomeo & Juliet
7	127585	6.465148	X-Men: Days of Future Past
8	4547	6.3152065	Panic Room
9	8834	6.2415876	Conspiracy Theory


In [126]:
analysis_settings = {
   "analyzer": {
      "default": {
        "type": "english"
      },
      "english_bigrams": {
          "tokenizer": "standard",
          "filter": [
            "lowercase",
            "porter_stem",
            "bigram_filter"
          ]
      }
    },
    "filter": {
        "bigram_filter": {
          "type": "shingle",
          "min_shingle_size": 2,
          "max_shingle_size": 2,
          "output_unigrams": False
        }
    }
}

In [136]:
mapping_settings = {
    "properties": {
        "cast": {
            "properties": {
                "name": {
                    "type": "text",
                    "analyzer": "english",
                    "fields": {
                        "bigramed": {
                            "type": "text",
                            "analyzer": "english_bigrams"
                        }     
                    }
                }
            }
        },
        "directors": {
            "properties": {
                "name": {
                    "type": "text",
                    "analyzer": "english",
                    "fields": {
                        "bigramed": {
                            "type": "text",
                            "analyzer": "english_bigrams"
                        }     
                    }
                }
            }
        }
    }
}

In [137]:
r = reindex(analysis_settings, mapping_settings, movie_dict)

using stettings
 {'settings': {'number_of_shards': 1, 'index': {'analysis': {'analyzer': {'default': {'type': 'english'}, 'english_bigrams': {'tokenizer': 'standard', 'filter': ['lowercase', 'porter_stem', 'bigram_filter']}}, 'filter': {'bigram_filter': {'type': 'shingle', 'min_shingle_size': 2, 'max_shingle_size': 2, 'output_unigrams': False}}}}}, 'mappings': {'properties': {'cast': {'properties': {'name': {'type': 'text', 'analyzer': 'english', 'fields': {'bigramed': {'type': 'text', 'analyzer': 'english_bigrams'}}}}}, 'directors': {'properties': {'name': {'type': 'text', 'analyzer': 'english', 'fields': {'bigramed': {'type': 'text', 'analyzer': 'english_bigrams'}}}}}}}}
previouse index removal: 200
index creation {'acknowledged': True, 'shards_acknowledged': True, 'index': 'tmdb'}


In [150]:
query = {
    "query": {
        "multi_match": {
            "query": user_search,
            "fields": ["title", "overview", "cast.name.bigramed", "directors.name.bigramed"]
        }
    }
}

In [148]:
search(query)

Num	id	Relevance	Movie title
0	18126	7.1845765	Hannah Montana: The Movie
1	510	6.9093037	One Flew Over the Cuckoo's Nest
2	27573	6.228061	The Bounty Hunter
3	200	6.058046	Star Trek: Insurrection
4	199	5.910869	Star Trek: First Contact
5	11527	5.7706733	Excalibur
6	45772	5.7706733	Gnomeo & Juliet
7	11836	5.443809	The SpongeBob SquarePants Movie
8	4951	5.443809	10 Things I Hate About You
9	193	5.3873396	Star Trek: Generations


In [149]:
user_search = "star trek patrick stewart"

In [151]:
query

{'query': {'multi_match': {'query': 'star trek patrick stewart',
   'fields': ['title',
    'overview',
    'cast.name.bigramed',
    'directors.name.bigramed']}}}

In [152]:
search(query)

Num	id	Relevance	Movie title
0	18126	10.878148	Hannah Montana: The Movie
1	13475	10.760254	Star Trek
2	193	9.007851	Star Trek: Generations
3	200	9.007851	Star Trek: Insurrection
4	201	9.007851	Star Trek: Nemesis
5	54138	9.007851	Star Trek Into Darkness
6	10567	8.1140175	Dinosaur
7	50780	8.019917	The Beaver
8	152	7.7462955	Star Trek: The Motion Picture
9	199	7.7462955	Star Trek: First Contact


In [159]:
query = {
    "query": {
        "multi_match": {
            "query": user_search,
            "fields": ["title", "overview", "cast.name.bigramed^5", "directors.name.bigramed"],
            "type": "best_fields",
            "tie_breaker": 0.4
        }
    }
}

In [160]:
search(query)

Num	id	Relevance	Movie title
0	200	33.893368	Star Trek: Insurrection
1	199	32.65286	Star Trek: First Contact
2	193	31.895023	Star Trek: Generations
3	201	30.539837	Star Trek: Nemesis
4	11527	28.853365	Excalibur
5	45772	28.853365	Gnomeo & Juliet
6	8834	26.936697	Conspiracy Theory
7	8005	26.353167	Robin Hood: Men in Tights
8	36657	25.794384	X-Men
9	76170	25.794384	The Wolverine
