In [2]:
import json

with open('queries_train.json', 'rt') as f:
  queries = json.load(f)

In [3]:
def average_precision(true_list, predicted_list, k=40):
    true_set = frozenset(true_list)
    predicted_list = predicted_list[:k]
    precisions = []
    for i,doc_id in enumerate(predicted_list):        
        if doc_id in true_set:
            prec = (len(precisions)+1) / (i+1)            
            precisions.append(prec)
    if len(precisions) == 0:
        return 0.0
    return round(sum(precisions)/len(precisions),3)

In [41]:
# Testing Functions
import numpy as np
def intersection(l1,l2):      
    return list(set(l1)&set(l2))

def recall_at_k(true_list,predicted_list,k=40):
    return round(len(intersection(true_list, predicted_list[:k]))/len(true_list), 3)

def precision_at_k(true_list,predicted_list,k=40):    
    return  round(len(intersection(true_list, predicted_list[:k]))/k, 3)

def r_precision(true_list,predicted_list):
    r = len(true_list)
    k_prec = len(intersection(true_list,predicted_list[:r]))
    return round(k_prec/r, 3)

def f_score(true_list,predicted_list,k=40):
    recall = recall_at_k(true_list, predicted_list, k)
    precision = precision_at_k(true_list, predicted_list, k)
    
    if recall == 0 and precision == 0:
      return 0.0

    return round(2*precision*recall/(recall + precision), 3)

def avg_eval_speed(res):
  return np.mean([tup[1] for tup in res])

def map_at_40(res):
  my_count = 0
  my_sum = 0
  for i in res:
    if i[2] is not None:
      my_sum += i[2]
    my_count +=1 
  return my_sum/my_count

def eval(rec_lst, f_lst, r_lst, res):
  print("recall@k: ", np.mean(rec_lst))
  print("f-score: ", np.mean(f_lst))
  print("r-precision: ", np.mean(r_lst))
  print("avg_eval_speed: ", avg_eval_speed(res))
  print("Map@k: ", map_at_40(res))

In [60]:
import requests
from time import time
# url = 'http://35.232.59.3:8080'
# place the domain you got from ngrok or GCP IP below. 
url = 'http://e0e7-34-72-108-240.ngrok.io'
qs_res, rec_lst, r_prec, f_scores= [], [], [], []
for q, true_wids in queries.items():
  duration, ap = None, None
  t_start = time()
  res = requests.get(url + '/search', {'query': q}, timeout=35)
  duration = time() - t_start
  if res.status_code == 200:
    pred_wids, _ = zip(*res.json())
    # calculate evalueation measures
    ap = average_precision(true_wids, pred_wids)
    rec_lst.append(recall_at_k(true_wids, pred_wids))
    r_prec.append(r_precision(true_wids, pred_wids))
    f_scores.append(f_score(true_wids, pred_wids))
  qs_res.append((q, duration, ap))


In [61]:
qs_res

[('python', 0.388350248336792, 0.665),
 ('data science', 2.7326111793518066, 0.198),
 ('migraine', 0.10274600982666016, 0.897),
 ('chocolate', 0.3867337703704834, 0.411),
 ('how to make pasta', 2.622701644897461, 0.617),
 ('Does pasta have preservatives?', 0.12704777717590332, 0.203),
 ('how google works', 2.8306946754455566, 0.649),
 ('what is information retrieval', 2.179422378540039, 0.573),
 ('NBA', 0.35607194900512695, 0.376),
 ('yoga', 0.1344144344329834, 0.667),
 ('how to not kill plants', 1.7413597106933594, 0.136),
 ('masks', 0.33974552154541016, 0.566),
 ('black friday', 2.3144137859344482, 0.198),
 ('why do men have nipples', 1.8068804740905762, 0.705),
 ('rubber duck', 0.6931555271148682, 0.112),
 ('michelin', 0.29958176612854004, 0.368),
 ('what to watch', 0.6023693084716797, 0.196),
 ('best marvel movie', 4.848108291625977, 0.343),
 ('how tall is the eiffel tower', 2.2207260131835938, 0.664),
 ('where does vanilla flavoring come from', 2.1208407878875732, 0.598),
 ('best 

In [55]:
# Evaluate
eval(rec_lst, r_prec, f_scores, qs_res)

recall@k:  0.13993333333333335
f-score:  0.21473333333333336
r-precision:  0.18720000000000006
avg_eval_speed:  1.4596919695536295
Map@k:  0.38383333333333347
