In [None]:
# Information Storage and Retrieval Practical
# Program: Document Retrieval using Inverted Files
# CO4 – Distributed and Multimedia IR

import os
import string

# Step 1: Folder containing text files
folder = "inverted"   # Folder with your .txt files
files = [f for f in os.listdir(folder) if f.endswith('.txt')]

# Step 2: Build the Inverted Index
inverted_index = {}

for file in files:
    with open(os.path.join(folder, file), 'r', encoding='utf-8') as f:
        text = f.read().lower()
        # Remove punctuation
        for p in string.punctuation:
            text = text.replace(p, " ")
        words = text.split()

        # Add words to inverted index
        for word in set(words):   # set() avoids duplicates per document
            if word not in inverted_index:
                inverted_index[word] = [file]
            else:
                inverted_index[word].append(file)

print("✅ Inverted Index created successfully!")
print("-" * 70)

# Step 3: Display part of the inverted index
for term, docs in list(inverted_index.items())[:10]:  # show first 10 terms
    print(f"{term} --> {docs}")

print("-" * 70)

# Step 4: Query processing
query = input("Enter a query term: ").lower().strip()

if query in inverted_index:
    print(f"Documents containing '{query}':")
    for doc in inverted_index[query]:
        print(f" - {doc}")
else:
    print(f"No documents found containing the term '{query}'.")


✅ Inverted Index created successfully!
----------------------------------------------------------------------
revolutionizing --> ['doc2.txt']
models --> ['doc2.txt', 'doc1.txt']
analytics --> ['doc2.txt']
outcomes --> ['doc2.txt']
player --> ['doc2.txt']
the --> ['doc2.txt', 'doc4.txt']
fitness --> ['doc2.txt']
predict --> ['doc2.txt']
match --> ['doc2.txt']
team --> ['doc2.txt']
----------------------------------------------------------------------
Enter a query term: machine
Documents containing 'machine':
 - doc2.txt
 - doc3.txt
 - doc1.txt
