In [13]:
import json

def load_index(filename):
    with open(filename, 'r') as f:
        return json.load(f)

def load_tfidf(filename):
    with open(filename, 'r') as f:
        return json.load(f)

def search_tfidf(tfidf_data, article_id, query_word):
    for article in tfidf_data:
        if article['title'] == article_id:
            return article['tfidf'].get(query_word, 0.0)  # Default to 0 if not found
    return 0.0  # If article not found in TF-IDF data

def search(index, tfidf_data, query):
    words = query.split()
    result = set()

    if len(words) == 1:
        # Single word query
        result = index.get(words[0], set())
    else:
        # Boolean query
        set1 = set(index.get(words[0], set()))
        set2 = set(index.get(words[2], set()))
        operator = words[1]
        if operator == 'AND':
            result = set1 & set2
        elif operator == 'OR':
            result = set1 | set2
        elif operator == 'NOT':
            result = set1 - set2
            
    results_with_tfidf = [(article_id, search_tfidf(tfidf_data, article_id, words[0])) for article_id in result]

    # Sort the list by TF-IDF value in descending order
    sorted_results = sorted(results_with_tfidf, key=lambda x: x[1], reverse=True)

    for article_id, tfidf_value in sorted_results:
        print(f"{article_id} ({tfidf_value})")
    return result

if __name__ == "__main__":
    index = load_index("index.json")
    tfidf_data = load_tfidf("tf-idf.json")
    query = input("Enter your query: ")
    search(index, tfidf_data, query)

Enter your query:  born NOT garnet


Joseph Gilman (guard) (0.09429915322785601)
Robert Mitwerandu (0.07749882182874171)
Robin Roussel (0.04990485502692381)
Artem Bessalov (0.0225437915661147)
Chester railway station (0.002913677369347455)
