# Importing Modules

In [1]:
import re

# Postfix Building

In [2]:
def precedence(op):
    if op == 'NOT':
        return 3
    elif op == 'AND':
        return 2
    elif op == 'OR':
        return 1
    else:
        return 0

def postfix(l):
    pf = []
    stack = []
    opList = ['AND','OR','NOT']
    for i in l:
        if i in opList:
            if stack == []:
                stack.append(i)
            else:
                while(stack != [] and precedence(i) <= precedence(stack[-1])):
                    pf.append(stack.pop())
                stack.append(i)
        elif i == '(':
            stack.append(i)
        elif i == ')':
            while(stack[-1] != '('):
                pf.append(stack.pop())
            stack.pop()
        else:
            pf.append(i)
    while(stack != []):
        pf.append(stack.pop())
    return pf

In [3]:
def getPostings(s,d):
    if isinstance(s,str):
        try:
            return d[s]
        except:
            return set()
    else:
        return s

# Query Evaluation

In [4]:
def queryEval(query,d,l):
    pf = postfix(query.split())
    stack = []
    opList = ['AND','OR','NOT']
    if len(pf) == 1:
        return getPostings(pf[0],d)
    else:
        for i in pf:
            if i in opList:
                if i == 'NOT':
                    op = stack.pop()
                    r = set(l).difference(getPostings(op,d))
                elif i == 'AND':
                    op1 = getPostings(stack.pop(),d)
                    op2 = getPostings(stack.pop(),d)
                    r = op1.intersection(op2)
                else:
                    op1 = getPostings(stack.pop(),d)
                    op2 = getPostings(stack.pop(),d)
                    r = op1.union(op2)
                stack.append(r)
            else:
                stack.append(i)
        return set() if len(stack) == 0 else stack.pop()

# Index construction

In [7]:
docs = []
terms = set()
l = input("Enter the documents : ").split()
# l = ["doc1.txt","doc2.txt","doc3.txt","doc4.txt"]

for i in l:
    file = open(i,'r').read().lower()
    content = re.sub("[^a-z0-9]"," ",file).split()
    terms.update(content)
    docs.append(set(content))
    
d = {}
for i in terms:
    post_list = []
    for j in range(len(docs)):
        if i in docs[j]:
            post_list.append(l[j])
    d[i] = set(post_list)
print(d)


Enter the documents : doc1.txt doc2.txt doc3.txt doc4.txt
{'ink': {'doc4.txt', 'doc1.txt'}, 'things': {'doc1.txt'}, 'ideas': {'doc3.txt'}, 'the': {'doc3.txt'}, 'and': {'doc4.txt', 'doc1.txt', 'doc2.txt', 'doc3.txt'}, 'make': {'doc1.txt'}, 'on': {'doc3.txt'}, 'a': {'doc4.txt', 'doc2.txt'}, 'so': {'doc2.txt'}, 'with': {'doc4.txt', 'doc2.txt', 'doc3.txt'}, 'words': {'doc3.txt'}, 'beautiful': {'doc1.txt'}, 'such': {'doc1.txt'}, 'tell': {'doc4.txt'}, 'flow': {'doc3.txt'}, 'world': {'doc4.txt'}, 'sweet': {'doc2.txt'}, 'shape': {'doc3.txt'}, 'story': {'doc4.txt'}, 'together': {'doc2.txt'}, 'create': {'doc4.txt'}, 'meet': {'doc2.txt'}, 'paper': {'doc1.txt', 'doc2.txt', 'doc3.txt'}, 'bond': {'doc2.txt'}, 'out': {'doc3.txt'}, 'sheet': {'doc4.txt', 'doc1.txt'}, 'to': {'doc4.txt'}, 'pen': {'doc1.txt', 'doc2.txt', 'doc3.txt'}, 'take': {'doc3.txt'}}


# query reading

In [6]:
rules = """
The inverted index have been created.
Rules for entering the query:
 1.There should be a single space between operand and operator
 2.There should be space before And after '(' , ')'
 3.Boolean operators should be in capital and words in small letters.
Enter the query:
"""
query = input(rules)
# resultDoc = queryEval("drug AND op",d,l)
resultDoc = queryEval(query,d,l)
if(len(resultDoc) == 0):
    print("No relevant documents")
else:
    print(resultDoc)


The inverted index have been created.
Rules for entering the query:
 1.There should be a single space between operand and operator
 2.There should be space before And after '(' , ')'
 3.Boolean operators should be in capital and words in small letters.
Enter the query:
ink AND sheet OR sweet
{'doc4.txt', 'doc1.txt', 'doc2.txt'}
