### **LIBRARIES**

In [106]:
import random    
import os        
import csv       
import json      
import requests  
import urllib    

### **DATA PATHS, GLOBAL VARIABLES**

In [107]:
DATA_DIR  = "data/"
FEATURE_QUERY_VALIDATE_TEST = os.path.join(DATA_DIR, "feature_query_validate_test.csv")

SOLR_URL = "http://localhost:8983/solr/core1"

N = 10    # Top N results to display

### **GET A RANDOM QUERY FROM THE VALIDATION_TEST DATASET**

In [108]:
def select_random_query(myseed):

    with open(FEATURE_QUERY_VALIDATE_TEST) as f:
        QUERY_LIST = [ {k: v for k, v in row.items()} for row in csv.DictReader(f, skipinitialspace=True) ]
    
    random.seed(myseed)
    random.shuffle(QUERY_LIST)
       
    query = QUERY_LIST[0]  
    return query   

### **HELPER FUNCTIONS**

In [145]:
def render_results(docs, query, ans, topic, top_n):

    print(f"Top {top_n} results for the query: {query} (answer: {ans}, topic: {topic})\n")
    
    for doc in docs:
        doc_id = int(doc["id"])
        qb_question = doc["qb_question"][0]                   # solr indexed the data as a list! Can this be fixed?
        qb_answer = doc["qb_answer"][0]
        qb_topic_id = doc["qb_topic_id"][0]
        print(f"doc_id: {doc_id} \t {qb_question} ({qb_answer})(topic: {qb_topic_id})")

In [146]:
def query_solr(payload):
    
    params = urllib.parse.urlencode(payload, quote_via=urllib.parse.quote_plus)
    search_url = SOLR_URL + "/query?" + params
    resp = requests.get(search_url)
    resp_json = json.loads(resp.text)
    docs = resp_json["response"]["docs"]
    
    return docs

### **PAYLOAD TO QUERY SOLR BY DEFAULT BM25 (ENTIRE SENTENCE or SUBSTRING)**

In [147]:
# NO MODEL: Default Solr (entire sentence)

def run_query_default_qa(q):
       
    ans = q['qb_answer']
    topic = q['qb_topic_id']
    
    query = q['qa']
    
    payload = {
        "q": query,
        "defType": "edismax",
        "qf": "qa",
        "fl": "id, qb_question, qb_answer, qb_topic_id",            
        "rows": N
    }

    docs = query_solr(payload)
    print("Default Solr results (query: entire sentence):")
    return render_results(docs, query, ans, topic, N)

In [148]:
# NO MODEL: Default Solr (substring)

def run_query_default_ss(q):
       
    ans = q['qb_answer']
    topic = q['qb_topic_id']
    
    query = q['ss']
    
    payload = {
        "q": query,
        "defType": "edismax",
        "qf": "ss",
        "fl": "id, qb_question, qb_answer, qb_topic_id",            
        "rows": N
    }

    docs = query_solr(payload)
    print("Default Solr results (query: substring):")
    return render_results(docs, query, ans, topic, N)

### **PAYLOAD TO QUERY SOLR USING LAMBDAMART BASELINE MODELS**
#### Baseline models only have one feature: BM25 for entire sentence (Model 1) or BM25 for substring (Model 2)

#### When we enable LTR, Solr will rerank the original results from default BM25 (i.e when no LTR models are used)
#### By default, Solr reranks the top 200 results.
#### But we can set the number N of reranks by setting the `reRankDocs` parameter, for example, to rerank only the top 10 results:
`"rq": f'{{!ltr model=lambdamart_model1 reRankDocs=10 ...}}',`

In [149]:
# with LTR BASELINE MODEL 1 (entire sentence)

def run_query_lambdamart_baseline_model1(q):
    
    ans = q['qb_answer']
    topic = q['qb_topic_id']

    query = q['qa']

    payload = {
        "q": query,
        "defType": "edismax",
        "qf": "qa",
        "rq": f'{{!ltr model=lambdamart_model1_baseline}}',
        "fl": "id, qb_question, qb_answer, qb_topic_id",            
        "rows": N
    }

    docs = query_solr(payload)
    print("LTR Baseline Model 1 results:")
    return render_results(docs, query, ans, topic, N)

In [150]:
# with LTR BASELINE MODEL 2 (substring)

def run_query_lambdamart_baseline_model2(q):
    
    ans = q['qb_answer']
    topic = q['qb_topic_id']

    query = q['ss']

    payload = {
        "q": query,
        "defType": "edismax",
        "qf": "ss",
        "rq": f'{{!ltr model=lambdamart_model2_baseline}}',
        "fl": "id, qb_question, qb_answer, qb_topic_id",            
        "rows": N
    }

    docs = query_solr(payload)
    print("LTR Baseline Model 2 results:")
    return render_results(docs, query, ans, topic, N)

### **PAYLOAD TO QUERY SOLR USING LAMBDAMART MODELS 1 AND 2**

In [151]:
# with LTR MODEL 1

def run_query_lambdamart_model1(q):

    ans = q['qb_answer']
    topic = q['qb_topic_id']

    query = q['qa']
    f2 = q["qa_pos"]
    f3 = q["qa_pos_bigram"]
    f4 = q["qa_pos_trigram"]
    f5 = q["qa_parse_tree"]
    
    payload = {
        "q": query,
        "defType": "edismax",
        "qf": "qa",
        "rq": f'{{!ltr model=lambdamart_model1 \
        efi.q2="{f2}" \
        efi.q3="{f3}" \
        efi.q4="{f4}" \
        efi.q5="{f5}"}}',
        "fl": "id, qb_question, qb_answer, qb_topic_id",            
        "rows": N
    }

    docs = query_solr(payload)
    print("LTR Model 1 results:")
    return render_results(docs, query, ans, topic, N)

In [152]:
# with LTR MODEL 2

def run_query_lambdamart_model2(q):
    
    ans = q['qb_answer']
    topic = q['qb_topic_id']

    query = q['ss']
    f2 = q["ss_pos"]
    f3 = q["ss_pos_bigram"]
    f4 = q["ss_pos_trigram"]
    f5 = q["ss_parse_tree"]
    f6 = q["before"]
    f7 = q["before_last"]
    f8 = (q["before_last_pos"]).lower()
    f9 = q["before_pos"]
    f10 = q["before_pos_bigram"]
    f11 = q["before_pos_trigram"]
    f12 = q["before_parse_tree"]
    f13 = q["after"]
    f14 = q["after_first"]
    f15 = (q["after_first_pos"]).lower()
    f16 = q["after_pos"]
    f17 = q["after_pos_bigram"]
    f18 = q["after_pos_trigram"]
    f19 = q["after_parse_tree"]
    f20 = q["ans"]
    f21 = q["ans_first"]
    f22 = q["ans_last"]
    f23 = (q["ans_pos"]).lower()
    f24 = (q["ans_first_pos"]).lower()
    f25 = (q["ans_last_pos"]).lower()
    f26 = q["ans_is_first"]
    f27 = q["ans_is_last"]
    f28 = q["ans_length"]
    
    payload = {
        "q": query,
        "defType": "edismax",
        "qf": "ss",
        "rq": f'{{!ltr model=lambdamart_model2 \
        efi.q2="{f2}" \
        efi.q3="{f3}" \
        efi.q4="{f4}" \
        efi.q5="{f5}" \
        efi.q6="{f6}" \
        efi.q7="{f7}" \
        efi.q8="{f8}" \
        efi.q9="{f9}" \
        efi.q10="{f10}" \
        efi.q11="{f11}" \
        efi.q12="{f12}" \
        efi.q13="{f13}" \
        efi.q14="{f14}" \
        efi.q15="{f15}" \
        efi.q16="{f16}" \
        efi.q17="{f17}" \
        efi.q18="{f18}" \
        efi.q19="{f19}" \
        efi.q20="{f20}" \
        efi.q21="{f21}" \
        efi.q22="{f22}" \
        efi.q23="{f23}" \
        efi.q24="{f24}" \
        efi.q25="{f25}" \
        efi.q26="{f26}" \
        efi.q27="{f27}" \
        efi.q28="{f28}"}}',
        "fl": "id, qb_question, qb_answer, qb_topic_id",            
        "rows": N,
    }
    
    docs = query_solr(payload)
    print("LTR Model 1 results:")
    return render_results(docs, query, ans, topic, N)

### **TESTS**

In [153]:
select_random_query(1)

{'id': '9120',
 'qb_question': 'She was going for play and asked him if he wants to *.',
 'qb_answer': 'come along',
 'qb_topic_id': '3',
 'qa': 'She was going for play and asked him if he wants to come along.',
 'qa_pos': 'PRP VBD VBG IN NN CC VBD PRP IN PRP VBZ TO VB NN',
 'qa_pos_bigram': 'PRP_VBD VBD_VBG VBG_IN IN_NN NN_CC CC_VBD VBD_PRP PRP_IN IN_PRP PRP_VBZ VBZ_TO TO_VB VB_NN',
 'qa_pos_trigram': 'PRP_VBD_VBG VBD_VBG_IN VBG_IN_NN IN_NN_CC NN_CC_VBD CC_VBD_PRP VBD_PRP_IN PRP_IN_PRP IN_PRP_VBZ PRP_VBZ_TO VBZ_TO_VB TO_VB_NN',
 'qa_parse_tree': 'S_NP_VP_. NP_PRP VP_VP_CC_VP VP_VBD_VP VP_VBG_PP PP_IN_NP NP_NN VP_VBD_NP_SBAR NP_PRP SBAR_IN_S S_NP_VP NP_PRP VP_VBZ_S S_VP VP_TO_VP VP_VB_ADVP ADVP_RB',
 'ss': 'if he wants to come along',
 'ss_pos': 'IN PRP VBZ TO VB IN',
 'ss_pos_bigram': 'IN_PRP PRP_VBZ VBZ_TO TO_VB VB_IN',
 'ss_pos_trigram': 'IN_PRP_VBZ PRP_VBZ_TO VBZ_TO_VB TO_VB_IN',
 'ss_parse_tree': 'SBAR_IN_S S_NP_VP NP_PRP VP_VBZ_S S_VP VP_TO_VP VP_VB_ADVP ADVP_RB',
 'before': 'if 

In [154]:
query = select_random_query(1)

run_query_default_qa(query)
print('')
run_query_lambdamart_baseline_model1(query)
print('')
run_query_lambdamart_model1(query)
print('')

run_query_default_ss(query)
print('')
run_query_lambdamart_baseline_model2(query)
print('')
run_query_lambdamart_model2(query)

Default Solr results (query: entire sentence):
Top 10 results for the query: She was going for play and asked him if he wants to come along. (answer: come along, topic: 3)

doc_id: 350 	 Please * if you want to audition for the part. (come along)(topic: 3)
doc_id: 369 	 She * to me and asked for directions. (came up)(topic: 3)
doc_id: 348 	 We are going to the movies. Do you want to *? (come along)(topic: 3)
doc_id: 347 	 I waited for an hour and then four buses * at once. (come along)(topic: 3)
doc_id: 175 	 She stayed up and waited for him till * midnight. (after)(topic: 1)
doc_id: 224 	 You cannot go for the mountain hike * I come along. (unless)(topic: 2)
doc_id: 216 	 She asked me * I was interested to join the party. (whether)(topic: 2)
doc_id: 349 	 You can * if you want but you must watch the proceedings in silence. (come along)(topic: 3)
doc_id: 346 	 You cannot go to the party unless I *. (come along)(topic: 3)
doc_id: 604 	 I * to the conclusion that he is a liar. Do not tru