In [92]:
import requests
import json

In [93]:
def pretty(parsed):
    print(json.dumps(parsed, indent=4, sort_keys=True))

In [94]:
def extract():
    f = open('tmdb.json')
    if f:
        return json.loads(f.read())

In [95]:
print("Extracting data");
data = extract();
print("Extraction complete");

Extracting data
Extraction complete


In [142]:
def addMappings(mappings={}):
    headers = {
        "Content-Type": "application/json"
    }
    resp = requests.put("http://localhost:9200/tmdb",headers=headers, data=json.dumps(mappings))
    pretty(resp.json())

def seedData(movieDict={}): 
    headers = {
        "Content-Type": "application/json"
    }
    bulkMovies = ""
    
    for id, movie in movieDict.items():
        addCmd = {"index": {"_index": "tmdb",
                           "_type": "movie",
                           "_id": movie["id"]}}
        bulkMovies += json.dumps(addCmd) + "\n" + json.dumps(movie) + "\n"
    resp = requests.post("http://localhost:9200/_bulk",headers=headers, data=bulkMovies)
    print("Indexing complete")

def reindex(analysisSettings={}, mappingSettings={}):
    settings = {
        "settings": {
            "number_of_shards": 1,
            "index": {
                "analysis": analysisSettings,
            }
        },
    }
    if mappingSettings:
        settings['mappings'] = mappingSettings
        
    headers = {
        "Content-Type": "application/json"
    }
    
    resp = requests.delete("http://localhost:9200/tmdb")
    print("Delete: ", resp)
    print("settings: ", json.dumps(settings))
    resp = requests.put("http://localhost:9200/tmdb",headers=headers, data=json.dumps(settings))
    print("Put settings: ", resp.status_code, resp.json())
    
    
    

In [143]:
mappings = {
    "properties": {
        "title": {
            "type": "text",
            "analyzer": "english"
        },
        "overview": {
            "type": "text",
            "analyzer": "english"
        }
    }
}

In [144]:
reindex(mappingSettings=mappings)
# addMappings(mappings=mappings)
seedData(movieDict=data)

Delete:  <Response [200]>
settings:  {"settings": {"number_of_shards": 1, "index": {"analysis": {}}}, "mappings": {"properties": {"title": {"type": "text", "analyzer": "english"}, "overview": {"type": "text", "analyzer": "english"}}}}
Put settings:  200 {'acknowledged': True, 'shards_acknowledged': True, 'index': 'tmdb'}
Indexing complete


In [138]:
def search(query):
    headers = {
        "Content-Type": "application/json"
    }
    url = "http://localhost:9200/tmdb/movie/_search"
    httpResp = requests.get(url, headers=headers, data=json.dumps(query))
    searchHits = json.loads(httpResp.text)['hits']
    print("Num\tRelevance Score\t\tMovie Title")
    for idx, hit in enumerate(searchHits["hits"]):
        print("%s\t%s\t\t%s" %
        (idx + 1, hit['_score'], hit['_source']["title"]))

In [139]:
userSearch = 'basketball with cartoon aliens'
query = {
    "query": {
        "multi_match": {
            "query": userSearch,
            "fields": ["title^10", "overview"]
        }
    }
}

In [140]:
search(query)

Num	Relevance Score		Movie Title


In [141]:
headers = {
    "Content-Type": "application/json"
}
userSearch = 'Alien'
query = {
    "query": {
        "multi_match": {
            "query": userSearch,
            "type": "best_fields",
            "fields": ["title^0.1", "overview"]
        }
    }
}
httpResp = requests.get('http://localhost:9200/tmdb/movie/_validate/query?explain', headers=headers, data=json.dumps(query))
pretty(httpResp.json())

{
    "_shards": {
        "failed": 0,
        "successful": 1,
        "total": 1
    },
    "explanations": [
        {
            "explanation": "+(overview:alien | (title:alien)^0.1) #MatchNoDocsQuery(\"\")",
            "index": "tmdb",
            "valid": true
        }
    ],
    "valid": true
}
