In [1]:

documents = {
    1: "The cat chased the dog around the garden",
    2: "She was sitting in the garden last night",
    3: "I read the book the night before"
}
import re


def preprocess(text):
    text = text.lower()
    text = re.sub(r'[^\w\s]', '', text)
    return text.split()

preprocessed_docs = {}
for doc_id, content in documents.items():
    preprocessed_docs[doc_id] = preprocess(content)

print(preprocessed_docs)

inverted_index = {}

for doc_id, tokens in preprocessed_docs.items():
    for token in tokens:
        if token not in inverted_index:
            inverted_index[token] = []
        if doc_id not in inverted_index[token]:
            inverted_index[token].append(doc_id)


print("Inverted Index:")
for term, doc_list in inverted_index.items():
    print(f"{term} → {doc_list}")

query_terms = ["garden", "night"]


garden_docs = set(inverted_index.get('garden', []))
night_docs = set(inverted_index.get('night', []))

result_docs = garden_docs.union(night_docs)

print("\nDocuments matching 'garden OR night':", result_docs)


{1: ['the', 'cat', 'chased', 'the', 'dog', 'around', 'the', 'garden'], 2: ['she', 'was', 'sitting', 'in', 'the', 'garden', 'last', 'night'], 3: ['i', 'read', 'the', 'book', 'the', 'night', 'before']}
Inverted Index:
the → [1, 2, 3]
cat → [1]
chased → [1]
dog → [1]
around → [1]
garden → [1, 2]
she → [2]
was → [2]
sitting → [2]
in → [2]
last → [2]
night → [2, 3]
i → [3]
read → [3]
book → [3]
before → [3]

Documents matching 'garden OR night': {1, 2, 3}
