# Run test queries with BM25 using `pyserini`

- We run the test queries against the indexes built for the laptop and restaurant domain.

## Google Colab setups

This part only gets executed if this notebook is being run under Google Colab. **Please change the working path  directory below in advance!**

In [1]:
# Use Google Colab
use_colab = True

# Is this notebook running on Colab?
# If so, then google.colab package (github.com/googlecolab/colabtools)
# should be available in this environment

# Previous version used importlib, but we could do the same thing with
# just attempting to import google.colab
try:
    from google.colab import drive
    colab_available = True
except:
    colab_available = False

if use_colab and colab_available:
    # If there are packages I need to install separately, do it here
    !pip install pyserini==0.9.4.0 jsonlines==1.2.0

    # Mount Google Drive
    drive.mount('/content/drive')

    # cd to the appropriate working directory under my Google Drive
    # (IMPORTANT: THIS PATH MUST MATCH EXACTLY TO WHERE THIS NOTEBOOK IS LOCATED
    # IN YOUR GOOGLE DRIVE!!)
    %cd '/content/drive/My Drive/CS646_Final_Project/BM25'

    # List the directory contents
    !ls

## Import packages

In [2]:
import os
import pathlib

from pyserini.search import SimpleSearcher

## Run BM25 on test queries (Laptop)

In [3]:
index_path_laptop = os.path.join('.', 'index', 'laptop_test')
queries_path_laptop = os.path.join('test_queries_laptop.txt')
results_path_laptop = os.path.join('test_results_laptop.txt')

In [4]:
searcher_laptop = SimpleSearcher(index_path_laptop)

In [5]:
# remove if exist because appending
print(results_path_laptop)

if os.path.exists(results_path_laptop):
    !rm -r {results_path_laptop}
else:
    pathlib.Path(results_path_laptop).parent.mkdir(parents=True, exist_ok=True)
    !touch {results_path_laptop}

with open(queries_path_laptop) as f:
    q_num = 0

    for query in f:
        q_num += 1
        
        hits = searcher_laptop.search(q=query, k=1000)

        for i in range(len(hits)):
            line = str(q_num) + ' Q0 ' + hits[i].docid + ' ' + str(i+1) + ' ' + '%.8f' % hits[i].score + ' bm25'
      
            with open(results_path_laptop, 'a') as f:
                f.write("%s\n" % line)

query_results_laptop.txt


## Run BM25 on test queries (Restaurant)

In [6]:
index_path_restaurant = os.path.join('.', 'index', 'restaurant_test')
queries_path_restaurant = os.path.join('test_queries_restaurant.txt')
results_path_restaurant = os.path.join('test_results_restaurant.txt')

In [7]:
searcher_restaurant = SimpleSearcher(index_path_restaurant)

In [8]:
# remove if exist because appending
print(results_path_restaurant)

if os.path.exists(results_path_restaurant):
    !rm -r {results_path_restaurant}
else:
    pathlib.Path(results_path_restaurant).parent.mkdir(parents=True, exist_ok=True)
    !touch {results_path_restaurant}

with open(queries_path_restaurant) as f:
    q_num = 0

    for query in f:
        q_num += 1
        
        hits = searcher_restaurant.search(q=query, k=1000)

        for i in range(len(hits)):
            line = str(q_num) + ' Q0 ' + hits[i].docid + ' ' + str(i+1) + ' ' + '%.8f' % hits[i].score + ' bm25'
      
            with open(results_path_restaurant, 'a') as f:
                f.write("%s\n" % line)

test_results_restaurant.txt
