In [72]:
# RUN: 'pip install jsonlines' on: ModuleNotFoundError: No module named 'jsonlines'
import json
import jsonlines
from tqdm import tqdm
import pandas as pd
import pickle
import re

# RUN: 'pip install whoosh' on: ModuleNotFoundError: No module named 'whoosh'
import os, os.path

#from whoosh import index
import whoosh
from whoosh.index import create_in
from whoosh.fields import *
from whoosh.writing import AsyncWriter

from datetime import datetime
from pytz import timezone

from IPython.display import clear_output

######################################

class Indexer:
    '''This class is used to create index of the WP database'''
    
    index_created = False
    index_directory = ""
    data_path = ""       
    
    def __init__(self):
        print('Indexer has been initiated.')
        
    def indexCreated(self):
        return self.index_created
    
    def openIndex(self):
        pass
      
    ## Count documents
    def countDocuments(self):
        print("Counting documents, this might take a while...")
        counter = 0
        tenKCounter = 0

        with jsonlines.open(self.data_path) as reader:
            for obj in tqdm(reader.iter(type=dict, skip_invalid=True)):
                tenKCounter += 1
                counter += 1
            
                if tenKCounter >= 10000:
                    print("Current count is: " + str(counter))
                    tenKCounter = 0 
        print("Last count is: " + str(counter))
        print("Counting done.")
    
    def printDocumentData(self):
        '''Prints every 10000th object'''
        #dataframe = pd.DataFrame(columns = ['Title', 'Content', 'Year'])
        with jsonlines.open(self.data_path) as reader:
            counter = 0
            for obj in tqdm(reader.iter(type=dict, skip_invalid=True)):
                counter += 1
                if (counter % 10000) == 0:
                    print("Object number:" + str(counter))
                    print(type(obj))
                    doc = Document(**obj)
                    print(doc)
        
    def extractDocumentContents(self, contents):
        '''Extracts document contents from array of dicts to string'''
        build_string = ""
        
        for item in contents:
            try:
                if item['type'] == "sanitized_html":
                    build_string = build_string + item['content']
                    build_string = build_string + "\n"
            except:
                build_string = "Could not retreive content"
        
        build_string = re.sub('<.*?>', '', build_string)
        #print(build_string)
        return build_string
    
    def extractDocumentDate(self, epochTimestamp):
        '''Converst UNIX epoch timestamp to DATETIME'''
        # TODO IMPLENT Default value on Handling Error: ValueError: invalid literal for int() with base 10: 'None'
        if (epochTimestamp == ''):
            return datetime(1,1,1)
        try:
            # Test if source is within specified dates
            date_info = epochTimestamp
            removed_zeros = str(date_info)[0:10]
            timestamp = int(removed_zeros)
            return datetime.fromtimestamp(timestamp, timezone('EST'))
        except:
            print("ERROR: In extractDocumentDate()")
            print("ERROR: ", sys.exc_info()[0])
            #print("Caused by value: " + str(date_info))
            return datetime(1,1,1)
    
    def setIndexDirectory(self, directory):
        self.index_directory = directory
    
    def getIndexLocation(self):
        return self.index_directory
    
    def setDataPath(self, file_location):
        self.data_path = file_location
    
    def getDataPath(self):
        return self.data_path
    
    def setIndexCreatedTrue(self):
        self.index_created = True        
    
    def index(self):
        '''This method indexes the data'''
        # https://whoosh.readthedocs.io/en/latest/api/writing.html
        # https://appliedmachinelearning.blog/2018/07/31/developing-a-fast-indexing-and-full-text-search-engine-with-whoosh-a-pure-python-library/
        # TODO add document contents to index
        
        #  [id, article_url, title, author, publised_date, contents, type, source]
        # Define fields using whoosh's 'Schema' | https://whoosh.readthedocs.io/en/latest/schema.html#
        # Can add field boost here
        # TODO: critically look at different fields, which are important?, which need to be shown to the user? etc.
        # TODO: check if index_location has been set
        
        assert self.index_directory and self.data_path # Both variables have to be set
        
        schema = Schema(doc_id = whoosh.fields.ID(unique=True, stored=True),\
                       article_url = whoosh.fields.STORED,\
                       title = whoosh.fields.TEXT(stored=True),\
                       author = whoosh.fields.ID,\
                       published_date = whoosh.fields.DATETIME,\
                       contents = whoosh.fields.TEXT)
              
        # Creating a index writer to add document as per schema
        myindex = whoosh.index.create_in(self.index_directory,schema)
        writer = whoosh.writing.AsyncWriter(myindex)
        
        # Build full index? Or partial
        full_index = True
        
        counter = 0
        fault_counter = 0
        true_scores = []
        
        if full_index:
            checker = 10000
        else:
            checker = 100
            
            print("Downloading query results.")
            print("This might take a few seconds...")
            
            # read in true scores file
            true_scores_url = "https://trec.nist.gov/data/core/qrels2018.txt"
            true_scores_file = urllib.request.urlopen(true_scores_url)

            for line in true_scores_file:
                decoded_line = line.decode("utf-8")
                decoded_line = decoded_line.split(" ")
                decoded_line = decoded_line[2]
                true_scores.append(decoded_line)
                
            print("Done downloading query results.")
        
        # Loop over data
        print("Looping over data. Indexing each article.")
        print("This might take a few minutes...")
        
        with jsonlines.open(self.data_path) as reader:
            for obj in tqdm(reader.iter(type=dict, skip_invalid=True)):
                index_line = False
                
                if not full_index:
                    for line in true_scores:
                        if line == obj['id']:
                            true_scores.remove(line)
                            index_line = True
                else:
                    index_line = True
                    
                if index_line:
                    retreived_date = self.extractDocumentDate(obj['published_date'])
                    
                    if retreived_date == datetime(1,1,1):
                        fault_counter = fault_counter + 1
                        continue
                        
                    writer.add_document(doc_id=obj['id'],\
                                        article_url=obj['article_url'],\
                                        title=obj['title'],\
                                        author=obj['author'],\
                                        published_date=retreived_date,\
                                        contents = self.extractDocumentContents(obj['contents']))
                    
                    counter = counter + 1
                    if counter > checker:
                        writer.commit()
                        writer = whoosh.writing.AsyncWriter(myindex)
                        
                        if full_index:
                            checker = checker + 10000
                        else:
                            checker = checker + 100
                        
                        clear_output(wait=True)
                        
                        print("Looping over data. Indexing each article.")
                        print("This might take a few minutes...")
                        print("Indexed " + str(counter - 1) + " articles")
                        
                        if fault_counter > 0:
                            print("Found " + str(fault_counter) + " wrongly formatted articles")
            
            print("Looping complete.")
        print("Index created!")
        self.setIndexCreatedTrue()
        
    


# indexer = Indexer()
# #indexer.countDocuments()
# #indexer.printDocumentData()
# #indexer.createDocumentDataStore()
# indexer.index()

In [73]:
###############################################
from whoosh.qparser import QueryParser
from whoosh import scoring
from whoosh.index import open_dir

class Ranking:
    '''This class contains functions that are used to create a ranking based on different algorithms'''
    
    show_n_results = 3
    index_directory = ""
    
    def __init__(self):
        print("Ranking class has been initiated.")
    
    def indexCreated(self):
        return indexCreated
    
    def createIndex(self):
        pass
    
    def setIndexDirectory(self, directory):
        self.index_directory = directory
    
    def openIndex(self):
        assert self.index_directory
        return open_dir(self.index_directory)
    
   
    def resultsToList(self, results):
        resultsList = []
        for result in results:
            resultsList.append(result.fields())
        return resultsList
            
    
    def searchTermFrequency(self, user_query, indexer):
        '''Returns results for a given query based on the Term Frequency search algorithm. Returned value is a list of dictionaries.'''
        #TODO check if index has been created
        index_dir = indexer.getIndexLocation()
        self.setIndexDirectory(index_dir)
        index = self.openIndex()
        resultsList = []        
        # Add: whoosh.qparser.MultifieldParser(fieldnames, schema, fieldboosts=None, **kwargs)
        # TODO: get fieldnames and schema from indexer -> add getters in Inderex class
        # TODO: add parameter that dictates search algorithm
 
        with index.searcher(weighting=scoring.Frequency) as searcher:
            parsed_query = QueryParser("title", index.schema).parse(user_query)
            results = searcher.search(parsed_query, limit=self.show_n_results)
            self.printResults(results)
            
    def searchTermFrequencyReturnResults(self, user_query):
        #TODO check if index has been created
        index = self.openIndex()
        
        with index.searcher(weighting=scoring.Frequency) as searcher:
            parsed_query = QueryParser("title", index.schema).parse(user_query)
            results = searcher.search(parsed_query)            
            return results
            
    
    def searchTF_IDF():
        '''Returns results for a given query based on the TF-IDF search algorithm. Returned value is a list of dictionaries.'''
        pass
    
    
    def searchBM25FReturnResults(self, user_query):
        
        index = self.openIndex()
        
        with index.searcher(weighting=scoring.BM25F(B=0.75, content_B=1.0, K1=1.5)) as searcher:
            parsed_query = QueryParser("title", index.schema).parse(user_query)
            results = searcher.search(parsed_query)            
            return results
    def searchBM25F():
        '''Returns results for a given query based on the BM25F search algorithm. Returned value is a list of dictionaries.'''
        pass
        
    

In [74]:
################################################
import urllib

class Evaluation:
    '''Used to evaluate the ranking results'''
    #should check if query used is in the 
    
    # TODO load evaluation document and queries
    # TODO pass queries on to ranking and obtain ranking results
    # TODO compare ranking results to TREC evaluation
    # TODO display results, how?
    # TODO test different algorithms
    
    queries = []
    ranking = Ranking()
    
    def load_queries(self):
        
        query_url = "https://trec.nist.gov/data/core/topics2018.txt"
        query_file = urllib.request.urlopen(query_url)
        titles, numbers = [], []
        title = False
        
        for line in query_file:
            decoded_line = line.decode("utf-8")
            
            if title and not "</title>" in decoded_line:
                titles.append(decoded_line.replace("\n", "").strip())

            if "<title>" in decoded_line:
                title = True
            
            if "</title>" in decoded_line:
                title = False
                
            if "<num>" in decoded_line:
                num = decoded_line.replace("<num>", "").replace("</num>", "").replace("\n", "").replace("Number: ", "").strip()
                numbers.append(num)
        
        queries = list(zip(titles, numbers))
        return queries 

        
    def addResults(self, results_formatted, results, query, number):
        count = 0
        for hit in results:
            current_result = str(number) + " 0 "                    
            current_result = str(current_result) + str(hit.docnum) + " " + str(hit.score)           
            results_formatted.append(current_result)
            count += 1
        print("for query {} there are {} hits".format(query, count))
        return results_formatted
        
        
    def writeResults(self, results):
        
        with open("results.txt", "w") as results_file:
            for result in results:
                results_file.write(result + "\n")
                
    
    def compareResults(self):        
        # read in results.txt
        predictions = []
        with open("results.txt", "r") as results_file:
            for line in results_file:
                current_result = line.split(" ")
                current_result = [i.strip("\n") for i in current_result]
                
                predictions.append(current_result)
                
        # read in true scores file
        true_scores = []
        true_scores_url = "https://trec.nist.gov/data/core/qrels2018.txt"
        true_scores_file = urllib.request.urlopen(true_scores_url)

        for line in true_scores_file:
            decoded_line = line.decode("utf-8")
            decoded_line = decoded_line.split(" ")
            decoded_line = [i.strip("\n") for i in decoded_line]
            
            true_scores.append(decoded_line)
        
        
        # compare results.txt (preds) with true scores
        correct_preds, incorrect_preds = 0, 0
        
        for pred in predictions:
            for true_score in true_scores:
                
                if pred[0] == true_score[0] and pred[2] == true_score[2]:
                    if pred[3] == true_score[3]:
                        correct_preds += 1
                    else:
                        incorrect_preds += 1
                        
                    break
                    
        if correct_preds == incorrect_preds == 0:
            print("No matches found, because doc id's do not yet work")

        else:
            print("Performance: {:.2f}%".format(correct_preds/(correct_preds+incorrect_preds)))
        


Ranking class has been initiated.


In [75]:
##################################################
class UserInterface:
    '''Class used to handle user interaction'''
    # TODOS:
    # implement method that asks used if 
    # implement methods that asks the user to set different options
          
    def __init__(self):
        '''This is the constructor method of the UI'''
        print("UserInterface has been initiated.")       
        
    def getUserQuery(self):
        ''''Retrieves the query from the user and returns it'''
        userInput = input("Please enter a query: ")
        return userInput
    
    def indexAlreadyCreated(self, directory_location):
        print("Index is set.")
        print("Index files are stored in directory:" + directory_location)
        return
    
    def shouldCreateIndex(self):
        print("Do you wish to create a new index?")
        userInput = input("Answer [y/n]: ")
        if userInput == ('y' or 'Y' or "yes"):
            return True
        return False
    
    def indexIsNotSet(self):
        print("Index has not been set.")
        return
    
    def getIndexDirectory(self, default_directory):
        userInput = input("Please enter a directory name for the index (default = /"+ default_directory + "): ")
        if userInput == "":
            return default_directory
        return userInput
    
    def getDataPath(self, default_data_path):
        print("Please enter the path to the TREC_Washington_Post_collection.v2.jl file.")
        print("The default location is: /"+ default_data_path + "): ")
        userInput = input()
        if userInput == "":
            return default_data_path
        return userInput
    
    def creatingIndex(self, index_directory, data_path):
        pass
    
    def shouldAddExistingIndex(self):
        print("Do you wish to add an existing index?")
        userInput = input("Answer [y/n]: ")
        if userInput == ('y' or 'Y' or "yes"):
            return True
        return False
    
    def stopSearchEngine(self):
        print("Search Engine is stopped.")
        
    def shouldTerminateSearchEngine(self):
        print("Do you wish to stop the search engine?")
        userInput = input("Answer [y/n]: ")
        if userInput == ('y' or 'Y' or "yes"):
            return True
        return False
    
    def printResults(self, results):
        print("\nPrinting results:")
        for result in results:
            print(result)
    

In [76]:
# Block used to run everything:
# TODO expand this 'Controll' class that handles information flow
# TODO add querying functionality
# TODO implement choice of search algorithm
# TODO implement evaluation mode
import time

class SearchEngine:
    '''This class embodies the search engine and acts a a controller class'''
    
    UI = None
    Indexer = None
    Ranking = None
    
    RUNNING = False
    STOPPED = False
    EVALUATION_MODE = True
    USER_MODE = False
    
    user_query = ""
    DEFAULT_data_path = 'WP-corpus/data/TREC_Washington_Post_collection.v2.jl'
    DEFAULT_index_directory = "indexdir"
    
    
    def __init__(self):
        print("Search Engine has been initiated.")
    
    def stopSearchEngine(self, UI):
        self.RUNNING = False
        UI.stopSearchEngine()        
        
    def setIndex(self, Indexer, UI):
        if Indexer.indexCreated():
            directory_location = Indexer.getIndexLocation()
            UI.indexAlreadyCreated(directory_location)
            return
        else:
            UI.indexIsNotSet()
            if UI.shouldCreateIndex(): # This is for creating a new index
                # Index directory (setDir)
                index_directory = UI.getIndexDirectory(self.DEFAULT_index_directory)
                Indexer.setIndexDirectory(index_directory)
                # Check if index_dir exist and makes one if it doesn't
                if not os.path.exists(index_directory):
                    os.mkdir(index_directory)
                
                # Data file path (setPath)    
                data_path = UI.getDataPath(self.DEFAULT_data_path)
                Indexer.setDataPath(data_path)
                
                # Creating Index
                UI.creatingIndex(index_directory, data_path)
                Indexer.index()
                return
                
            elif UI.shouldAddExistingIndex(): # This is for adding an existing index
                
                # TODO communicate chosen directories and chosen data file
                # TODO add checks to check for existence of index?
                
                # Index directory, duplicate code -> (setDir)
                index_directory = UI.getIndexDirectory(self.DEFAULT_index_directory)
                Indexer.setIndexDirectory(index_directory)
                # Check if index_dir exist and makes one if it doesn't
                if not os.path.exists(index_directory):
                    os.mkdir(index_directory)
                
                # Data file path, duplicate code -> (setPath)    
                data_path = UI.getDataPath(self.DEFAULT_data_path)
                Indexer.setDataPath(data_path)
                
                # Set index created to true
                Indexer.setIndexCreatedTrue()
                return
                                
            else: # TODO: Ask if the user wants to continue or stop
                if UI.shouldTerminateSearchEngine(): 
                    self.STOPPED = True
                return
            return
       
    def run(self):
        '''This function start the search engine'''
        print('Search Engine started.')
        
        self.UI = UserInterface()
        self.Indexer = Indexer()
        self.Ranking = Ranking()
                
        self.RUNNING = True
        while self.RUNNING:
            if not self.Indexer.indexCreated():
                #TODO Inform user that an index needs to be set
                self.setIndex(self.Indexer, self.UI)
                if self.STOPPED == True: # Stop SE if user did not want to continue
                    self.stopSearchEngine(self.UI)
                    break
            #TODO ask user to choose a mode
                       
            if self.EVALUATION_MODE:
            # This is used to evaluate SE against the TREC relevance judgements
                index_dir = self.Indexer.getIndexLocation()
                self.Ranking.setIndexDirectory(index_dir)
                
                evaluation = Evaluation()
                queries  = evaluation.load_queries()
                print("Queries loaded")
                
                results_formatted = []
                
                for (query, number) in tqdm(queries):
                    #results = self.Ranking.searchTermFrequencyReturnResults(query)
                    results = self.Ranking.searchBM25FReturnResults(query)
                    results_formatted = evaluation.addResults(results_formatted, results, query, number)
                
                evaluation.writeResults(results_formatted)
                evaluation.compareResults()    
                
                print("Done")
                self.RUNNING = False
 
            if self.USER_MODE:
                # This is used to query questions
                #TODO ask for search algorithm
                #TODO ask for user query
                #TODO move printing results to UI
                print("Entering USER_MODE")
                user_query = self.UI.getUserQuery()
                print("Entered query: " + user_query)
                

                results = self.Ranking.searchTermFrequency(user_query, self.Indexer)
                self.UI.printResults(results)
                
            print("Starting over.")
            time.sleep(1)

In [77]:
mySearchEngine = SearchEngine()
mySearchEngine.run()

590070it [11:19, 761.25it/s]

Looping over data. Indexing each article.
This might take a few minutes...
Indexed 590000 articles
Found 9 wrongly formatted articles


595025it [11:26, 866.91it/s]


Looping complete.
Index created!


  0%|                                                                                           | 0/50 [00:00<?, ?it/s]

Queries loaded


  2%|█▋                                                                                 | 1/50 [00:03<02:43,  3.33s/it]

for query Women in Parliaments there are 0 hits


  4%|███▎                                                                               | 2/50 [00:03<01:57,  2.45s/it]

for query Black Bear Attacks there are 0 hits


  6%|████▉                                                                              | 3/50 [00:04<01:25,  1.83s/it]

for query Airport Security there are 10 hits


  8%|██████▋                                                                            | 4/50 [00:04<01:03,  1.37s/it]

for query Wildlife Extinction there are 0 hits


 10%|████████▎                                                                          | 5/50 [00:04<00:47,  1.06s/it]

for query Health and Computer Terminals there are 0 hits


 12%|█████████▉                                                                         | 6/50 [00:05<00:37,  1.18it/s]

for query human smuggling there are 1 hits


 14%|███████████▌                                                                       | 7/50 [00:05<00:29,  1.43it/s]

for query transportation tunnel disasters there are 0 hits


 16%|█████████████▎                                                                     | 8/50 [00:05<00:24,  1.71it/s]

for query piracy there are 10 hits


 18%|██████████████▉                                                                    | 9/50 [00:06<00:21,  1.92it/s]

for query hydrogen energy there are 1 hits


 20%|████████████████▍                                                                 | 10/50 [00:06<00:18,  2.12it/s]

for query euro opposition there are 0 hits


 22%|██████████████████                                                                | 11/50 [00:06<00:18,  2.17it/s]

for query mercy killing there are 0 hits


 24%|███████████████████▋                                                              | 12/50 [00:07<00:15,  2.40it/s]

for query automobile recalls there are 0 hits


 26%|█████████████████████▎                                                            | 13/50 [00:07<00:15,  2.45it/s]

for query Amazon rain forest there are 1 hits


 28%|██████████████████████▉                                                           | 14/50 [00:07<00:13,  2.60it/s]

for query tropical storms there are 4 hits


 30%|████████████████████████▌                                                         | 15/50 [00:08<00:14,  2.39it/s]

for query Cuba, sugar, exports there are 0 hits


 32%|██████████████████████████▏                                                       | 16/50 [00:08<00:13,  2.43it/s]

for query art, stolen, forged there are 0 hits


 34%|███████████████████████████▉                                                      | 17/50 [00:09<00:12,  2.59it/s]

for query law enforcement, dogs there are 0 hits


 36%|█████████████████████████████▌                                                    | 18/50 [00:09<00:11,  2.75it/s]

for query UV damage, eyes there are 0 hits


 38%|███████████████████████████████▏                                                  | 19/50 [00:09<00:10,  2.83it/s]

for query Greek, philosophy, stoicism there are 0 hits


 40%|████████████████████████████████▊                                                 | 20/50 [00:10<00:10,  2.98it/s]

for query inventions, scientific discoveries there are 0 hits


 42%|██████████████████████████████████▍                                               | 21/50 [00:10<00:09,  3.08it/s]

for query heroic acts there are 0 hits


 44%|████████████████████████████████████                                              | 22/50 [00:10<00:09,  3.03it/s]

for query women clergy there are 3 hits


 46%|█████████████████████████████████████▋                                            | 23/50 [00:11<00:08,  3.17it/s]

for query human stampede there are 0 hits


 48%|███████████████████████████████████████▎                                          | 24/50 [00:11<00:07,  3.27it/s]

for query food stamps increase there are 0 hits


 50%|█████████████████████████████████████████                                         | 25/50 [00:11<00:07,  3.35it/s]

for query college education advantage there are 0 hits


 52%|██████████████████████████████████████████▋                                       | 26/50 [00:11<00:07,  3.42it/s]

for query Africa polio vaccination there are 0 hits


 54%|████████████████████████████████████████████▎                                     | 27/50 [00:12<00:07,  2.93it/s]

for query women driving in Saudi Arabia there are 0 hits


 56%|█████████████████████████████████████████████▉                                    | 28/50 [00:12<00:08,  2.61it/s]

for query declining middle class in U.S. there are 0 hits


 58%|███████████████████████████████████████████████▌                                  | 29/50 [00:13<00:07,  2.78it/s]

for query "Women on 20s" there are 0 hits


 60%|█████████████████████████████████████████████████▏                                | 30/50 [00:13<00:07,  2.73it/s]

for query eating invasive species there are 1 hits


 62%|██████████████████████████████████████████████████▊                               | 31/50 [00:13<00:07,  2.70it/s]

for query computers and paralyzed people there are 0 hits


 64%|████████████████████████████████████████████████████▍                             | 32/50 [00:14<00:07,  2.54it/s]

for query Chavez medical treatment in Cuba there are 0 hits


 66%|██████████████████████████████████████████████████████                            | 33/50 [00:14<00:07,  2.27it/s]

for query Boston marathon bombing verdict there are 0 hits


 68%|███████████████████████████████████████████████████████▊                          | 34/50 [00:15<00:06,  2.32it/s]

for query protect Earth from asteroids there are 0 hits


 70%|█████████████████████████████████████████████████████████▍                        | 35/50 [00:15<00:06,  2.26it/s]

for query diabetes and toxic chemicals there are 0 hits


 72%|███████████████████████████████████████████████████████████                       | 36/50 [00:16<00:05,  2.34it/s]

for query car hacking there are 1 hits


 74%|████████████████████████████████████████████████████████████▋                     | 37/50 [00:16<00:05,  2.23it/s]

for query social media and teen suicide there are 0 hits


 76%|██████████████████████████████████████████████████████████████▎                   | 38/50 [00:17<00:05,  2.26it/s]

for query marijuana potency there are 1 hits


 78%|███████████████████████████████████████████████████████████████▉                  | 39/50 [00:17<00:04,  2.38it/s]

for query China one-child impact there are 0 hits


 80%|█████████████████████████████████████████████████████████████████▌                | 40/50 [00:17<00:04,  2.30it/s]

for query Jason Rezaian released from Iran there are 0 hits


 82%|███████████████████████████████████████████████████████████████████▏              | 41/50 [00:18<00:04,  2.22it/s]

for query federal minimum wage increase there are 0 hits


 84%|████████████████████████████████████████████████████████████████████▉             | 42/50 [00:18<00:03,  2.39it/s]

for query Alan Gross released by Cuba there are 0 hits


 86%|██████████████████████████████████████████████████████████████████████▌           | 43/50 [00:19<00:02,  2.34it/s]

for query eggs in a healthy diet there are 0 hits


 88%|████████████████████████████████████████████████████████████████████████▏         | 44/50 [00:19<00:02,  2.48it/s]

for query U.S. age demographics there are 0 hits


 90%|█████████████████████████████████████████████████████████████████████████▊        | 45/50 [00:19<00:01,  2.56it/s]

for query bacterial infection mortality rate there are 0 hits


 92%|███████████████████████████████████████████████████████████████████████████▍      | 46/50 [00:20<00:01,  2.66it/s]

for query email scams there are 0 hits


 94%|█████████████████████████████████████████████████████████████████████████████     | 47/50 [00:20<00:01,  2.79it/s]

for query Sony cyberattack there are 5 hits


 96%|██████████████████████████████████████████████████████████████████████████████▋   | 48/50 [00:20<00:00,  2.79it/s]

for query control of MRSA there are 0 hits


 98%|████████████████████████████████████████████████████████████████████████████████▎ | 49/50 [00:21<00:00,  2.80it/s]

for query Bezos purchases Washington Post there are 0 hits


100%|██████████████████████████████████████████████████████████████████████████████████| 50/50 [00:21<00:00,  2.30it/s]

for query ethanol and food prices there are 0 hits





No matches found, because doc id's do not yet work
Done
Starting over.
