In [None]:
documents = {
    "doc1": "The quick brown fox jumped over the lazy dog",
    "doc2": "The lazy dog slept in the sun",
    "doc3": "Quick brown animals are amazing",
    "doc4": "The sun is bright and amazing"
}

In [None]:
inverted_index = {}
for doc_id, text in documents.items():
    words = text.lower().split()
    for word in words:
        if word not in inverted_index:
            inverted_index[word] = []
        if doc_id not in inverted_index[word]:
            inverted_index[word].append(doc_id)

In [None]:
for word, docs in sorted(inverted_index.items()):
    print(f"{word} -> {', '.join(docs)}")

amazing -> doc3, doc4
and -> doc4
animals -> doc3
are -> doc3
bright -> doc4
brown -> doc1, doc3
dog -> doc1, doc2
fox -> doc1
in -> doc2
is -> doc4
jumped -> doc1
lazy -> doc1, doc2
over -> doc1
quick -> doc1, doc3
slept -> doc2
sun -> doc2, doc4
the -> doc1, doc2, doc4


In [None]:
#single word
def search_word(word):
    word = word.lower()
    if word in inverted_index:
        return inverted_index[word]
    return []

In [None]:
search_term = "lazy"
result = search_word(search_term)
print(f"Documents containing '{search_term}': {result}")
for doc_id in result:
    print(f"{doc_id}: {documents[doc_id]}")

Documents containing 'lazy': ['doc1', 'doc2']
doc1: The quick brown fox jumped over the lazy dog
doc2: The lazy dog slept in the sun


In [None]:
#multi word
def search_and(words):
    if not words:
        return []

    results = search_word(words[0])
    for word in words[1:]:
        word_docs = search_word(word)
        results = [doc for doc in results if doc in word_docs]
    return results

In [None]:
search_terms = ["the", "dog"]
and_result = search_and(search_terms)
print(f"Documents containing ALL words {search_terms}: {and_result}")
for doc_id in and_result:
    print(f"{doc_id}: {documents[doc_id]}")

Documents containing ALL words ['the', 'dog']: ['doc1', 'doc2']
doc1: The quick brown fox jumped over the lazy dog
doc2: The lazy dog slept in the sun


In [None]:
def search_or(words):
    all_docs = set()
    for word in words:
        docs = search_word(word)
        all_docs.update(docs)
    return list(all_docs)

In [None]:
or_result = search_or(["amazing", "bright"])
print(f"Documents containing ANY of ['amazing', 'bright']: {or_result}")
for doc_id in or_result:
    print(f"{doc_id}: {documents[doc_id]}")

Documents containing ANY of ['amazing', 'bright']: ['doc3', 'doc4']
doc3: Quick brown animals are amazing
doc4: The sun is bright and amazing


In [None]:
def create_position_index():
    pos_index = {}
    for doc_id, text in documents.items():
        words = text.lower().split()
        for pos, word in enumerate(words):
            if word not in pos_index:
                pos_index[word] = {}
            if doc_id not in pos_index[word]:
                pos_index[word][doc_id] = []
            pos_index[word][doc_id].append(pos)
    return pos_index

In [None]:
position_index = create_position_index()
print("Word 'the' positions:", position_index["the"])

Word 'the' positions: {'doc1': [0, 6], 'doc2': [0, 5], 'doc4': [0]}
