Krishna Sharma | AP22110010128

In [27]:
documents = {
    1: "data algorithm structure",
    2: "database query system", 
    3: "algorithm analysis optimization",
    4: "machine learning data model",
    5: "network protocol security",
    6: "operating system process scheduling",
    7: "computer architecture hardware",
    8: "data mining machine learning",
}

Dynamic Indexing

In [28]:
def build_initial_index(documents):
    inverted_index = {}
    for doc_id, text in documents.items():
        for word in text.lower().split():
            if word not in inverted_index:
                inverted_index[word] = set()
            inverted_index[word].add(doc_id)

    print(f"Initial documents: {documents}")
    print(f"Initial index: {dict(sorted(inverted_index.items()))}")
    return inverted_index
    
inverted_index = build_initial_index(documents)

Initial documents: {1: 'data algorithm structure', 2: 'database query system', 3: 'algorithm analysis optimization', 4: 'machine learning data model', 5: 'network protocol security', 6: 'operating system process scheduling', 7: 'computer architecture hardware', 8: 'data mining machine learning'}
Initial index: {'algorithm': {1, 3}, 'analysis': {3}, 'architecture': {7}, 'computer': {7}, 'data': {8, 1, 4}, 'database': {2}, 'hardware': {7}, 'learning': {8, 4}, 'machine': {8, 4}, 'mining': {8}, 'model': {4}, 'network': {5}, 'operating': {6}, 'optimization': {3}, 'process': {6}, 'protocol': {5}, 'query': {2}, 'scheduling': {6}, 'security': {5}, 'structure': {1}, 'system': {2, 6}}


In [29]:
def add(doc_id, text, documents, inverted_index):
    print(f"\nADDING Document {doc_id}: '{text}'")
    
    documents[doc_id] = text
    
    for word in text.lower().split():
        if word not in inverted_index:
            inverted_index[word] = set()
        inverted_index[word].add(doc_id)
    
    print(f"Updated documents: {documents}")
    print(f"Updated index: {dict(sorted(inverted_index.items()))}")

In [30]:
def update(doc_id, new_text, documents, inverted_index):
    print(f"\nUPDATING Document {doc_id}: '{new_text}'")
    
    if doc_id in documents:
        old_text = documents[doc_id]
        print(f"Old text: '{old_text}'")
        
        for word in old_text.lower().split():
            if word in inverted_index:
                inverted_index[word].discard(doc_id)
                if not inverted_index[word]:
                    del inverted_index[word]
        
        documents[doc_id] = new_text
        for word in new_text.lower().split():
            if word not in inverted_index:
                inverted_index[word] = set()
            inverted_index[word].add(doc_id)
        
        print(f"Updated documents: {documents}")
        print(f"Updated index: {dict(sorted(inverted_index.items()))}")
    else:
        print(f"Document {doc_id} not found!")

In [31]:
def delete(doc_id, documents, inverted_index):
    print(f"\nDELETING Document {doc_id}")
    
    if doc_id in documents:
        text = documents[doc_id]
        print(f"Deleting: '{text}'")
        
        del documents[doc_id]
        
        for word in text.lower().split():
            if word in inverted_index:
                inverted_index[word].discard(doc_id)
                if not inverted_index[word]:
                    del inverted_index[word]
        
        print(f"Updated documents: {documents}")
        print(f"Updated index: {dict(sorted(inverted_index.items()))}")
    else:
        print(f"Document {doc_id} not found!")

In [32]:
add(9, "artificial intelligence neural networks", documents, inverted_index)


ADDING Document 9: 'artificial intelligence neural networks'
Updated documents: {1: 'data algorithm structure', 2: 'database query system', 3: 'algorithm analysis optimization', 4: 'machine learning data model', 5: 'network protocol security', 6: 'operating system process scheduling', 7: 'computer architecture hardware', 8: 'data mining machine learning', 9: 'artificial intelligence neural networks'}
Updated index: {'algorithm': {1, 3}, 'analysis': {3}, 'architecture': {7}, 'artificial': {9}, 'computer': {7}, 'data': {8, 1, 4}, 'database': {2}, 'hardware': {7}, 'intelligence': {9}, 'learning': {8, 4}, 'machine': {8, 4}, 'mining': {8}, 'model': {4}, 'network': {5}, 'networks': {9}, 'neural': {9}, 'operating': {6}, 'optimization': {3}, 'process': {6}, 'protocol': {5}, 'query': {2}, 'scheduling': {6}, 'security': {5}, 'structure': {1}, 'system': {2, 6}}


In [33]:
update(5, "cybersecurity network protection firewall", documents, inverted_index)


UPDATING Document 5: 'cybersecurity network protection firewall'
Old text: 'network protocol security'
Updated documents: {1: 'data algorithm structure', 2: 'database query system', 3: 'algorithm analysis optimization', 4: 'machine learning data model', 5: 'cybersecurity network protection firewall', 6: 'operating system process scheduling', 7: 'computer architecture hardware', 8: 'data mining machine learning', 9: 'artificial intelligence neural networks'}
Updated index: {'algorithm': {1, 3}, 'analysis': {3}, 'architecture': {7}, 'artificial': {9}, 'computer': {7}, 'cybersecurity': {5}, 'data': {8, 1, 4}, 'database': {2}, 'firewall': {5}, 'hardware': {7}, 'intelligence': {9}, 'learning': {8, 4}, 'machine': {8, 4}, 'mining': {8}, 'model': {4}, 'network': {5}, 'networks': {9}, 'neural': {9}, 'operating': {6}, 'optimization': {3}, 'process': {6}, 'protection': {5}, 'query': {2}, 'scheduling': {6}, 'structure': {1}, 'system': {2, 6}}


In [34]:
add(10, "database optimization performance tuning", documents, inverted_index)


ADDING Document 10: 'database optimization performance tuning'
Updated documents: {1: 'data algorithm structure', 2: 'database query system', 3: 'algorithm analysis optimization', 4: 'machine learning data model', 5: 'cybersecurity network protection firewall', 6: 'operating system process scheduling', 7: 'computer architecture hardware', 8: 'data mining machine learning', 9: 'artificial intelligence neural networks', 10: 'database optimization performance tuning'}
Updated index: {'algorithm': {1, 3}, 'analysis': {3}, 'architecture': {7}, 'artificial': {9}, 'computer': {7}, 'cybersecurity': {5}, 'data': {8, 1, 4}, 'database': {2, 10}, 'firewall': {5}, 'hardware': {7}, 'intelligence': {9}, 'learning': {8, 4}, 'machine': {8, 4}, 'mining': {8}, 'model': {4}, 'network': {5}, 'networks': {9}, 'neural': {9}, 'operating': {6}, 'optimization': {10, 3}, 'performance': {10}, 'process': {6}, 'protection': {5}, 'query': {2}, 'scheduling': {6}, 'structure': {1}, 'system': {2, 6}, 'tuning': {10}}


In [35]:
delete(7, documents, inverted_index)


DELETING Document 7
Deleting: 'computer architecture hardware'
Updated documents: {1: 'data algorithm structure', 2: 'database query system', 3: 'algorithm analysis optimization', 4: 'machine learning data model', 5: 'cybersecurity network protection firewall', 6: 'operating system process scheduling', 8: 'data mining machine learning', 9: 'artificial intelligence neural networks', 10: 'database optimization performance tuning'}
Updated index: {'algorithm': {1, 3}, 'analysis': {3}, 'artificial': {9}, 'cybersecurity': {5}, 'data': {8, 1, 4}, 'database': {2, 10}, 'firewall': {5}, 'intelligence': {9}, 'learning': {8, 4}, 'machine': {8, 4}, 'mining': {8}, 'model': {4}, 'network': {5}, 'networks': {9}, 'neural': {9}, 'operating': {6}, 'optimization': {10, 3}, 'performance': {10}, 'process': {6}, 'protection': {5}, 'query': {2}, 'scheduling': {6}, 'structure': {1}, 'system': {2, 6}, 'tuning': {10}}


In [36]:
update(3, "advanced algorithm complexity analysis", documents, inverted_index)


UPDATING Document 3: 'advanced algorithm complexity analysis'
Old text: 'algorithm analysis optimization'
Updated documents: {1: 'data algorithm structure', 2: 'database query system', 3: 'advanced algorithm complexity analysis', 4: 'machine learning data model', 5: 'cybersecurity network protection firewall', 6: 'operating system process scheduling', 8: 'data mining machine learning', 9: 'artificial intelligence neural networks', 10: 'database optimization performance tuning'}
Updated index: {'advanced': {3}, 'algorithm': {1, 3}, 'analysis': {3}, 'artificial': {9}, 'complexity': {3}, 'cybersecurity': {5}, 'data': {8, 1, 4}, 'database': {2, 10}, 'firewall': {5}, 'intelligence': {9}, 'learning': {8, 4}, 'machine': {8, 4}, 'mining': {8}, 'model': {4}, 'network': {5}, 'networks': {9}, 'neural': {9}, 'operating': {6}, 'optimization': {10}, 'performance': {10}, 'process': {6}, 'protection': {5}, 'query': {2}, 'scheduling': {6}, 'structure': {1}, 'system': {2, 6}, 'tuning': {10}}


Boolean Query Implementation

In [38]:
def boolean_query(query, inverted_index, doc_ids):
    tokens = query.lower().split()
    if not tokens:
        return set()
    
    all_docs = set(doc_ids)
    
    if tokens[0] == 'not' and len(tokens) > 1:
        result = all_docs - inverted_index.get(tokens[1], set())
        i = 2
    else:
        result = inverted_index.get(tokens[0], set())
        i = 1
    
    while i < len(tokens):
        if tokens[i] == 'and' and i + 1 < len(tokens):
            if tokens[i + 1] == 'not' and i + 2 < len(tokens):
                term = all_docs - inverted_index.get(tokens[i + 2], set())
                i += 3
            else:
                term = inverted_index.get(tokens[i + 1], set())
                i += 2
            result = result & term
            
        elif tokens[i] == 'or' and i + 1 < len(tokens):
            if tokens[i + 1] == 'not' and i + 2 < len(tokens):
                term = all_docs - inverted_index.get(tokens[i + 2], set())
                i += 3
            else:
                term = inverted_index.get(tokens[i + 1], set())
                i += 2
            result = result | term
        else:
            i += 1
    
    return result

In [41]:
print(f"\nCurrent Documents after Dynamic Operations:")
for doc_id, text in sorted(documents.items()):
    print(f"  {doc_id}: '{text}'")


Current Documents after Dynamic Operations:
  1: 'data algorithm structure'
  2: 'database query system'
  3: 'advanced algorithm complexity analysis'
  4: 'machine learning data model'
  5: 'cybersecurity network protection firewall'
  6: 'operating system process scheduling'
  8: 'data mining machine learning'
  9: 'artificial intelligence neural networks'
  10: 'database optimization performance tuning'


In [42]:
print(f"\nCurrent Index after Dynamic Operations:")
for term, doc_set in sorted(inverted_index.items()):
    print(f"  {term}: {sorted(doc_set)}")


Current Index after Dynamic Operations:
  advanced: [3]
  algorithm: [1, 3]
  analysis: [3]
  artificial: [9]
  complexity: [3]
  cybersecurity: [5]
  data: [1, 4, 8]
  database: [2, 10]
  firewall: [5]
  intelligence: [9]
  learning: [4, 8]
  machine: [4, 8]
  mining: [8]
  model: [4]
  network: [5]
  networks: [9]
  neural: [9]
  operating: [6]
  optimization: [10]
  performance: [10]
  process: [6]
  protection: [5]
  query: [2]
  scheduling: [6]
  structure: [1]
  system: [2, 6]
  tuning: [10]


In [43]:
queries = [
    "algorithm AND data",
    "network OR security", 
    "NOT machine"
]

for q in queries:
    result = boolean_query(q, inverted_index, documents.keys())
    print(f"{q:20} -> {sorted(result)}")
    for doc_id in sorted(result):
        print(f"    Doc {doc_id}: '{documents[doc_id]}'")

algorithm AND data   -> [1]
    Doc 1: 'data algorithm structure'
network OR security  -> [5]
    Doc 5: 'cybersecurity network protection firewall'
NOT machine          -> [1, 2, 3, 5, 6, 9, 10]
    Doc 1: 'data algorithm structure'
    Doc 2: 'database query system'
    Doc 3: 'advanced algorithm complexity analysis'
    Doc 5: 'cybersecurity network protection firewall'
    Doc 6: 'operating system process scheduling'
    Doc 9: 'artificial intelligence neural networks'
    Doc 10: 'database optimization performance tuning'


In [44]:
queries = [
    "artificial AND intelligence",
    "cybersecurity OR firewall",
    "optimization AND performance"
]

print("\nQuery Set 2 - New Terms from Dynamic Updates:")
for q in queries:
    result = boolean_query(q, inverted_index, documents.keys())
    print(f"{q:25} -> {sorted(result)}")
    for doc_id in sorted(result):
        print(f"    Doc {doc_id}: '{documents[doc_id]}'")


Query Set 2 - New Terms from Dynamic Updates:
artificial AND intelligence -> [9]
    Doc 9: 'artificial intelligence neural networks'
cybersecurity OR firewall -> [5]
    Doc 5: 'cybersecurity network protection firewall'
optimization AND performance -> [10]
    Doc 10: 'database optimization performance tuning'


In [45]:
queries = [
    "algorithm AND NOT analysis",
    "data OR intelligence AND NOT mining",
    "network AND cybersecurity OR database"
]

print("\nQuery Set 3 - Complex Operations:")
for q in queries:
    result = boolean_query(q, inverted_index, documents.keys())
    print(f"{q:35} -> {sorted(result)}")
    for doc_id in sorted(result):
        print(f"    Doc {doc_id}: '{documents[doc_id]}'")


Query Set 3 - Complex Operations:
algorithm AND NOT analysis          -> [1]
    Doc 1: 'data algorithm structure'
data OR intelligence AND NOT mining -> [1, 4, 9]
    Doc 1: 'data algorithm structure'
    Doc 4: 'machine learning data model'
    Doc 9: 'artificial intelligence neural networks'
network AND cybersecurity OR database -> [2, 5, 10]
    Doc 2: 'database query system'
    Doc 5: 'cybersecurity network protection firewall'
    Doc 10: 'database optimization performance tuning'
