In [2]:
documents = [
    "Information retrieval is the process of obtaining information from a large collection.",
    "Inverted file is an index structure used to retrieve documents efficiently.",
    "Information retrieval systems use inverted file for fast searching of documents.",
    "The inverted index stores mapping from content to its locations in the database."
]

In [3]:
inverted_index = {}

In [4]:
for doc_id, text in enumerate(documents):
    for word in text.lower().split():
        word = word.strip('.,')
        if word not in inverted_index:
            inverted_index[word] = []
        if doc_id not in inverted_index[word]:
            inverted_index[word].append(doc_id)

In [5]:
print("Inverted File / Index:\n")
for word in sorted(inverted_index.keys()):
    print(f"{word:15} -> {inverted_index[word]}")

Inverted File / Index:

a               -> [0]
an              -> [1]
collection      -> [0]
content         -> [3]
database        -> [3]
documents       -> [1, 2]
efficiently     -> [1]
fast            -> [2]
file            -> [1, 2]
for             -> [2]
from            -> [0, 3]
in              -> [3]
index           -> [1, 3]
information     -> [0, 2]
inverted        -> [1, 2, 3]
is              -> [0, 1]
its             -> [3]
large           -> [0]
locations       -> [3]
mapping         -> [3]
obtaining       -> [0]
of              -> [0, 2]
process         -> [0]
retrieval       -> [0, 2]
retrieve        -> [1]
searching       -> [2]
stores          -> [3]
structure       -> [1]
systems         -> [2]
the             -> [0, 3]
to              -> [1, 3]
use             -> [2]
used            -> [1]


In [6]:
query = input("\nEnter a term to search: ").lower()
if query in inverted_index:
    print(f"\nDocuments containing '{query}':")
    for doc_id in inverted_index[query]:
        print(f"Doc {doc_id+1}: {documents[doc_id]}")
else:
    print("\nTerm not found in any document.")


Enter a term to search:  large



Documents containing 'large':
Doc 1: Information retrieval is the process of obtaining information from a large collection.
