In [8]:
# Define the documents
document1 = "The quick brown fox jumped over the lazy dog ."
document2 = "The lazy dog slept in the sun ."

# Step 1: Tokenize the documents
# Convert each document to lowercase and split it into words
tokens1 = document1.lower().split()
tokens2 = document2.lower().split()

# Combine the tokens into a list of unique terms
terms = list(set(tokens1 + tokens2))

# Step 2: Build the inverted index
# Create an empty dictionary to store the inverted index
inverted_index = {}

# For each term, find the documents that contain it
for term in terms:
    documents = []
    if term in tokens1:
        documents.append("Document 1")
    if term in tokens2:
        documents.append("Document 2")
    inverted_index[term] = documents

# Step 3: Print the inverted index
print("Inverted Index:")
for term, documents in inverted_index.items():
    print(term, "->", ", ".join(documents))

# Step 4: Search Query
query = input("\nEnter your search query: ").lower()  # Get the search query from the user
query_terms = query.split()  # Split query into individual terms

# Find the documents for the query
result_docs = set()  # To store the matching documents

# Iterate over the query terms and retrieve documents
for term in query_terms:
    if term in inverted_index:
        result_docs.update(inverted_index[term])  # Add documents that contain the query term

# Step 5: Display the results
if result_docs:
    print("\nDocuments matching the query:")
    for doc in result_docs:
        print(doc)
else:
    print("\nNo documents found for the query.")


Inverted Index:
dog -> Document 1, Document 2
the -> Document 1, Document 2
brown -> Document 1
slept -> Document 2
in -> Document 2
fox -> Document 1
lazy -> Document 1, Document 2
over -> Document 1
. -> Document 1, Document 2
jumped -> Document 1
quick -> Document 1
sun -> Document 2

Enter your search query: sun

Documents matching the query:
Document 2


# Explanation:
1. Inverted Index Construction: The inverted index is created the same way as before, mapping each word to the documents it appears in.
2. Search Input: We prompt the user to input a search query.

      -The query is converted to lowercase and split into individual terms.
3. Search Query Processing: We search the inverted index for each query term and find which documents contain it.
4. Result Display: If any matching documents are found, they are displayed. Otherwise, a message indicates no matches were found.# 

# How it Works:
- Inverted Index: The program builds an index of words (terms) and the documents where those words appear.
- Search: The user enters a search query, and the program checks which documents contain all the words in the query.
- Results: The documents containing all the terms from the query are displayed.

# Alternative short


In [5]:
# Define the documents
document1 = "The quick brown fox jumped over the lazy dog."
document2 = "The lazy dog slept in the sun."

# Step 1: Tokenize the documents
# Convert each document to lowercase and split it into words
tokens1 = document1.lower().split()
tokens2 = document2.lower().split()

# Combine the tokens into a list of unique terms
terms = list(set(tokens1 + tokens2))

# Step 2: Build the inverted index
# Create an empty dictionary to store the inverted index
inverted_index = {}

# For each term, find the documents that contain it
for term in terms:
	documents = []
	if term in tokens1:
		documents.append("Document 1")
	if term in tokens2:
		documents.append("Document 2")
	inverted_index[term] = documents

# Step 3: Print the inverted index
for term, documents in inverted_index.items():
	print(term, "->", ", ".join(documents))


dog -> Document 2
the -> Document 1, Document 2
brown -> Document 1
slept -> Document 2
in -> Document 2
fox -> Document 1
lazy -> Document 1, Document 2
sun. -> Document 2
over -> Document 1
jumped -> Document 1
quick -> Document 1
dog. -> Document 1


In [None]:
# Step 1: Define three documents with sample text
# Each document contains a text string of approximately 50 words.

doc1 = """In the quiet village, there was a small bakery where the aroma of fresh bread
filled the air every morning. The baker was known for his skill and dedication,
baking pastries, cakes, and breads for the villagers. People from nearby towns
would visit, drawn by the delicious smells and warm hospitality."""

doc2 = """In a bustling city, the streets were lined with markets where vendors
sold fresh produce, handmade goods, and spices. The city was a melting pot
of cultures and traditions, with people from all over the world. Every day was
an opportunity to meet someone new and learn something interesting."""

doc3 = """The countryside was calm and serene, with green fields stretching to the
horizon. Farmers worked the land, growing crops that would be harvested in the fall.
The village was close-knit, with everyone knowing each other and sharing in the
community’s joys and challenges. Nature provided a peaceful backdrop to daily life."""

documents = {"Document 1": doc1, "Document 2": doc2, "Document 3": doc3}

# Step 2: Create an inverted index
# An inverted index is a dictionary where each word maps to a list of documents containing that word.

from collections import defaultdict
import re

def build_inverted_index(documents):
    """
    This function builds an inverted index from a dictionary of documents.
    :param documents: A dictionary with document names as keys and text as values
    :return: Inverted index dictionary
    """
    inverted_index = defaultdict(list)
    
    for doc_name, text in documents.items():
        # Tokenize the text (split into words) and clean
        words = re.findall(r'\w+', text.lower())  # Convert to lowercase and extract words
        
        for word in set(words):  # Use set to avoid duplicate words in the same document
            inverted_index[word].append(doc_name)
    
    return dict(inverted_index)  # Convert defaultdict to regular dict for readability

# Build the inverted index
inverted_index = build_inverted_index(documents)
print("Inverted Index:\n", inverted_index)

# Step 3: Function to retrieve documents based on a search query
# The search will look for documents containing all words in the query.

def search_documents(query, inverted_index):
    """
    This function searches for documents containing all words in the query.
    :param query: A search query string
    :param inverted_index: Inverted index dictionary
    :return: List of document names containing all query words
    """
    query_words = re.findall(r'\w+', query.lower())  # Tokenize and clean the query
    relevant_docs = []
    
    for word in query_words:
        if word in inverted_index:
            if not relevant_docs:
                relevant_docs = set(inverted_index[word])  # Initialize with first word's docs
            else:
                relevant_docs = relevant_docs.intersection(inverted_index[word])  # Find common docs
        else:
            return []  # If any word in query is not found, no documents are relevant
    
    return list(relevant_docs)

# Step 4: Perform a search
query = "village bakery"
results = search_documents(query, inverted_index)
print(f"\nDocuments containing '{query}':\n", results)