# Reranking with Solr LTR Model

In [1]:
import json
import os
import random
import requests
import urllib

In [2]:
SOLR_URL = "http://localhost:8983/solr/tmdbindex/"
QUERY_LIST = [
    "murder", "musical", "biography", "police", "world war ii",
    "comedy", "superhero", "nazis", "romance", "martial arts",
    "extramarital", "spy", "vampire", "magic", "wedding",
    "sport", "prison", "teacher", "alien", "dystopia"
]
TOP_N = 10

In [3]:
def rating2label(rating):
    """ convert 0-10 continuous rating to 1-5 categorical labels """
    return int(rating // 2) + 1


def get_rating_string(rating):
    rating_string = []
    for i in range(rating):
        rating_string.append(u"\u2605")
    for i in range(5 - rating):
        rating_string.append(u"\u2606")
    return "".join(rating_string)


print(get_rating_string(3))
print(get_rating_string(rating2label(6.4)))

★★★☆☆
★★★★☆


In [4]:
query = QUERY_LIST[random.randint(0, len(QUERY_LIST))]
if len(query.split()) > 1:
    query = "\"" + query + "\""

## Top 20 results without LTR

In [5]:
def render_results(docs, query, top_n):
    print("top {:d} results for {:s}".format(TOP_N * 2, query))
    print("---")
    for doc in resp_json["response"]["docs"]:
        doc_id = int(doc["id"])
        stars = get_rating_string(rating2label(float(doc["rating_f"])))
        score = float(doc["score"])
        title = doc["title_t"]
        print("{:s} {:06d} {:.3f} {:s}".format(stars, doc_id, score, title))


payload = {
    "q": query,
    "defType": "edismax",
    "qf": "title_t description_t",
    "pf": "title_t description_t",
    "mm": 2,
    "fl": "id,title_t,rating_f,score",            
    "rows": TOP_N * 2
}
params = urllib.parse.urlencode(payload, quote_via=urllib.parse.quote_plus)
search_url = SOLR_URL + "select?" + params
resp = requests.get(search_url)
resp_json = json.loads(resp.text)
docs = resp_json["response"]["docs"]
render_results(docs, query, TOP_N * 2)

top 20 results for "world war ii"
---
★★★★☆ 039485 14.994 Hotel Sahara
★★★★☆ 143335 14.659 The Gathering Storm
★★★☆☆ 166610 14.659 The Ducktators
★★★★☆ 030298 14.497 The Secret of Santa Vittoria
★★★★☆ 043313 14.339 The Teahouse of the August Moon
★★★☆☆ 035954 14.339 Cornered
★★★☆☆ 074474 14.339 Varian's War
★★★☆☆ 165300 14.184 Hotel Berlin
★★★★☆ 029032 14.184 The Secret Invasion
★★★☆☆ 034945 14.184 The Conspirators
★★★★☆ 004820 14.032 Never So Few
★★★☆☆ 343070 14.004 Flight World War II
★★★★☆ 027367 13.883 Mrs. Miniver
★★★★☆ 022905 13.875 The Rape of Europa
★★★★☆ 011589 13.738 Kelly's Heroes
★★★★☆ 051044 13.738 Carmen Jones
★★★★☆ 044480 13.738 Education for Death
★★★★☆ 048882 13.738 Podranki
★★★★☆ 018884 13.596 Nuremberg
★☆☆☆☆ 118443 13.596 Nothing Too Good for a Cowboy


## Top 20 results with LTR (top 10)

In [6]:
payload = {
    "q": query,
    "defType": "edismax",
    "qf": "title_t description_t",
    "pf": "title_t description_t",
    "mm": 2,
    "rq": "{!ltr model=myLambdaMARTModel reRankDocs=10 efi.query=" + query + "}",
    "fl": "id,title_t,rating_f,score",            
    "rows": TOP_N * 2
}
params = urllib.parse.urlencode(payload, quote_via=urllib.parse.quote_plus)
search_url = SOLR_URL + "select?" + params
resp = requests.get(search_url)
resp_json = json.loads(resp.text)
docs = resp_json["response"]["docs"]
render_results(docs, query, TOP_N * 2)

top 20 results for "world war ii"
---
★★★★☆ 030298 -1.897 The Secret of Santa Vittoria
★★★★☆ 143335 -2.010 The Gathering Storm
★★★☆☆ 074474 -2.055 Varian's War
★★★☆☆ 034945 -2.166 The Conspirators
★★★★☆ 029032 -2.174 The Secret Invasion
★★★☆☆ 035954 -2.281 Cornered
★★★☆☆ 165300 -2.281 Hotel Berlin
★★★★☆ 039485 -2.352 Hotel Sahara
★★★☆☆ 166610 -2.611 The Ducktators
★★★★☆ 043313 -2.683 The Teahouse of the August Moon
★★★★☆ 004820 14.032 Never So Few
★★★☆☆ 343070 14.004 Flight World War II
★★★★☆ 027367 13.883 Mrs. Miniver
★★★★☆ 022905 13.875 The Rape of Europa
★★★★☆ 011589 13.738 Kelly's Heroes
★★★★☆ 051044 13.738 Carmen Jones
★★★★☆ 044480 13.738 Education for Death
★★★★☆ 048882 13.738 Podranki
★★★★☆ 018884 13.596 Nuremberg
★☆☆☆☆ 118443 13.596 Nothing Too Good for a Cowboy
