# Reranking (no Index support)

First we run the rerank subcommand to generate the top 10 reranked results. Note we have added a normalization parameter here to make sure all the features are normalized (so less chance of blowups).

    cd <scripts_dir>
    java -jar RankLib-2.10.jar \
        -load ../data/diy_lambdamart_model.txt \
        -rank ../data/diy_features_test.txt \
        -norm zscore \
        -score ../data/diy_lambdamart_scores.txt

Terminal output is as follows:

    Discard orig. features
    Model file:	../data/diy_lambdamart_model.txt
    Feature normalization: zscore
    Model:		LambdaMART
    Reading feature file [../data/diy_features_test.txt]... [Done.]            
    (5 ranked lists, 410 entries read)


In [1]:
import json
import operator
import os
import random
import requests
import urllib

In [2]:
DATA_DIR = "../../data"
SCORE_FILE = os.path.join(DATA_DIR, "diy_lambdamart_scores.txt")

SOLR_URL = "http://localhost:8983/solr/tmdbindex/"

QID_TO_QUERY = {
    16: "spy",
    17: "extramarital",
    18: "world war ii",
    19: "musical",
    20: "alien"
}
TOP_N = 10

In [3]:
def rating2label(rating):
    """ convert 0-10 continuous rating to 1-5 categorical labels """
    return int(rating // 2) + 1


def get_rating_string(rating):
    rating_string = []
    for i in range(rating):
        rating_string.append(u"\u2605")
    for i in range(5 - rating):
        rating_string.append(u"\u2606")
    return "".join(rating_string)


print(get_rating_string(3))
print(get_rating_string(rating2label(6.4)))

★★★☆☆
★★★★☆


## Top 20 without LTR

In [4]:
qid = random.randint(16, 20)
query = QID_TO_QUERY[qid]

In [5]:
def render_results(docs, query, top_n):
    print("top {:d} results for {:s}".format(top_n * 2, query))
    print("---")
    for doc in docs[0:top_n * 2]:
        doc_id = int(doc["id"])
        stars = get_rating_string(rating2label(float(doc["rating_f"])))
        score = float(doc["score"])
        title = doc["title_t"]
        print("{:s} {:06d} {:.3f} {:s}".format(stars, doc_id, score, title))


payload = {
    "q": query,
    "defType": "edismax",
    "qf": "title_t description_t",
    "pf": "title_t description_t",
    "mm": 2,
    "fl": "id,title_t,rating_f,score",            
    "rows": TOP_N * 5
}
params = urllib.parse.urlencode(payload, quote_via=urllib.parse.quote_plus)
search_url = SOLR_URL + "select?" + params
resp = requests.get(search_url)
resp_json = json.loads(resp.text)
docs = resp_json["response"]["docs"]
render_results(docs, query, TOP_N)

top 20 results for spy
---
★★★★☆ 238713 9.455 Spy
★★★☆☆ 010535 7.967 Spy Hard
★★★☆☆ 010054 7.967 Spy Kids
★★★★☆ 001535 7.967 Spy Game
★★★☆☆ 008427 7.967 I Spy
★★☆☆☆ 031377 7.967 Spy School
★★★★☆ 177234 7.967 Spy Chasers
★★★★★ 370722 7.967 London Spy
★★★☆☆ 038417 7.749 The Thief
★★★☆☆ 161620 7.663 Wonder Woman
★★★☆☆ 052991 7.495 My Son John
★★★★☆ 073420 7.495 Dishonored
★★★★☆ 137587 7.495 Reilly: Ace of Spies
★★★☆☆ 051787 7.491 British Intelligence
★★★★☆ 000213 7.414 North by Northwest
★★★☆☆ 183932 7.414 Mare Nostrum
★★★★☆ 224600 7.335 The Executioner
★★★☆☆ 056288 7.329 Spy Kids: All the Time in the World
★★★☆☆ 041135 7.257 Operation: Endgame
★★★☆☆ 028426 7.257 Invisible Agent


## Top 20 with LTR

In [6]:
fscores = open(SCORE_FILE, "r")
rows = []
for line in fscores:
    line = line.strip()
    rqid, doc_id, score = line.split("\t")
    if int(rqid) != qid:
        continue
    rows.append((int(doc_id), float(score)))
reranked_rows = sorted(rows, key=operator.itemgetter(1), reverse=True)[0:TOP_N]

In [7]:
reranked_docs = []
# LTR layer
for doc_id, score in reranked_rows:
    doc = docs[doc_id]
    doc["score"] = score
    reranked_docs.append(doc)
# rest of the results
doc_ids_to_remove = set([x[0] for x in reranked_rows])
for doc in docs:
    doc_id = int(doc["id"])
    if doc_id in doc_ids_to_remove:
        continue
    reranked_docs.append(doc)
    
render_results(reranked_docs, query, TOP_N)

top 20 results for spy
---
★★★★☆ 238713 0.200 Spy
★★☆☆☆ 111310 0.028 Operator 13
★★★☆☆ 010535 -0.146 Spy Hard
★★★☆☆ 010054 -0.146 Spy Kids
★★★★☆ 001535 -0.146 Spy Game
★★★☆☆ 008427 -0.146 I Spy
★★☆☆☆ 031377 -0.146 Spy School
★★★★☆ 177234 -0.146 Spy Chasers
★★★★★ 370722 -0.146 London Spy
★★★☆☆ 038417 -0.146 The Thief
★★★★☆ 238713 0.200 Spy
★★★☆☆ 010535 -0.146 Spy Hard
★★★☆☆ 010054 -0.146 Spy Kids
★★★★☆ 001535 -0.146 Spy Game
★★★☆☆ 008427 -0.146 I Spy
★★☆☆☆ 031377 -0.146 Spy School
★★★★☆ 177234 -0.146 Spy Chasers
★★★★★ 370722 -0.146 London Spy
★★★☆☆ 038417 -0.146 The Thief
★★★☆☆ 161620 7.663 Wonder Woman
