In [1]:
#import statements
import os
from collections.abc import Sequence
import math
import glob

In [2]:
class Bitset(Sequence):
    """A very simple bitset implementation for Python.

    Note that, like with normal numbers, the leftmost
    index is the MSB, and like normal sequences, that
    is 0.

    Usage:
        >>> b = Bitset(5)
        >>> b
        Bitset(101)
        >>> b[:]
        [True, False, True]
        >>> b[0] = False
        >>> b
        Bitset(001)
        >>> b << 1
        Bitset(010)
        >>> b >> 1
        Bitset(000)
        >>> b & 1
        Bitset(001)
        >>> b | 2
        Bitset(011)
        >>> b ^ 6
        Bitset(111)
        >>> ~b
        Bitset(110)
    """

    value = 0
    length = 0

    @classmethod
    def from_sequence(cls, seq):
        """Iterates over the sequence to produce a new Bitset.

        As in integers, the 0 position represents the LSB.
        """
        n = 0
        for index, value in enumerate(reversed(seq)):
            n += 2**index * bool(int(value))
        b = Bitset(n)
        return b

    def __init__(self, value=0, length=0):
        """Creates a Bitset with the given integer value."""
        self.value = value
        try: self.length = length or math.floor(math.log(value, 2)) + 1
        except Exception: self.length = 0

    def __and__(self, other):
        b = Bitset(self.value & int(other))
        b.length = min((self.length, other.length))
        return b

    def __or__(self, other):
        b = Bitset(self.value | int(other))
        b.length = max((self.length, b.length))
        return b

    def __invert__(self):
        b = Bitset(~self.value)
        b.length = max((self.length, b.length))
        return b

    def __xor__(self, value):
        b = Bitset(self.value ^ int(value))
        b.length = max((self.length, b.length))
        return b

    def __lshift__(self, value):
        b = Bitset(self.value << int(value))
        b.length = max((self.length, b.length))
        return b

    def __rshift__(self, value):
        b = Bitset(self.value >> int(value))
        b.length = max((self.length, b.length))
        return b

    def __eq__(self, other):
        try:
            return self.value == other.value
        except Exception:
            return self.value == other

    def __int__(self):
        return self.value

    def __getitem__(self, s):
        """Gets the specified position.

        Like normal integers, 0 represents the MSB.
        """
        try:
            start, stop, step = s.indices(len(self))
            results = []
            for position in range(start, stop, step):
                pos = len(self) - position - 1
                results.append(bool(self.value & (1 << pos)))
            return results
        except:
            pos = len(self) - s - 1
            return bool(self.value & (1 << pos))

    def __setitem__(self, s, value):
        """Sets the specified position/s to value.

        Like normal integers, 0 represents the MSB.
        """
        try:
#             print(self)
            start, stop, step = s.indices(len(self))
            for position in range(start, stop, step):
                pos = len(self) - position - 1
                if value: self.value |= (1 << pos)
                else: self.value &= ~(1 << pos)
            maximum_position = max((start + 1, stop, len(self)))
            self.length = maximum_position
        except:
            pos = len(self) - s - 1
#             print(len(self))
            if value: self.value |= (1 << pos)
            else: self.value &= ~(1 << pos)
            if len(self) < pos: self.length = pos
        return self

    def __iter__(self):
        """Iterates over the values in the bitset."""
        for i in self[:]:
            yield i

    def __len__(self):
        """Returns the length of the bitset."""
        return self.length

In [3]:
# Total number of files present in the folder
# Folder Location
APP_FOLDER = "./"

txtCounter = len(glob.glob1(APP_FOLDER,"*.txt"))

In [4]:
# Initialising Dictionaries depending on number of text files
dictlist = [dict() for x in range(txtCounter)]

# Declaring maximum number of words in the file
file_size = 30000

file_name=[]

file_counter = -1

# All files in the desired folder
for files in os.listdir(APP_FOLDER):
    # Only checking for .txt files
    if files.endswith(".txt"):
        file_counter=file_counter+1
        
        with open(os.path.join(APP_FOLDER, files),'r') as file:
            
            file_name.append(files)
            
            word_counter=0           
            for line in file:
                for word in line.split():
                    # Converting whole string to uppercase
                    word=word.upper()
                    # Counting number of words
                    word_counter = word_counter+1
                    
                    # arpan 
                    # 101001
                    empty_bitset = Bitset(1)
                    empty_bitset = empty_bitset<<file_size
                    empty_bitset = empty_bitset.__setitem__(0,0)
                    
                    # Checking if last character of word is not punctuation
                    last_char = word[-1]
                    # " . " will give wrong answer, so check it
                    if  (not ('a' <=last_char and last_char<='z')) and (not('A'<=last_char and last_char<='Z')) and len(word)!=1 :
                        dictlist[file_counter][word[:-1]] = dictlist[file_counter].get(word[:-1],empty_bitset).__setitem__(word_counter,1)
                        word_counter = word_counter+1
                        empty_bitset = Bitset(1)
                        empty_bitset = empty_bitset<<file_size
                        empty_bitset = empty_bitset.__setitem__(0,0)
                        dictlist[file_counter][word[-1]] = dictlist[file_counter].get(word[-1],empty_bitset).__setitem__(word_counter,1)
                        
                    else:
#                         print(empty_bitset)
                        dictlist[file_counter][word] = dictlist[file_counter].get(word,empty_bitset).__setitem__(word_counter,1)

In [None]:
# Taking Queries

query = input()
while query != "-1":
    # Converting query to uppercase
    query=query.upper()
    
    # Splitting the query in parts
    words_list = []
    for words in query.split():
        last_char = words[-1]
        if  (not ('a' <=last_char and last_char<='z')) and (not('A'<=last_char and last_char<='Z')) and len(words)!=1 :
            words_list.append(words[:-1])
            words_list.append(words[-1])
        else:
            words_list.append(words)
    
    # Checking words in Dictionary
    empty_bitset = Bitset(1)
    empty_bitset = empty_bitset<<file_size
    empty_bitset = empty_bitset.__setitem__(0,0)
    
    # Checking for the sentence in every dictionary
    dic_num = -1
    for dic in dictlist:
        flag=1
        dic_num = dic_num+1
        ans_bitset=empty_bitset.__invert__()
        word_num = -1
        for word in words_list:
            word_num = word_num +1
            if word not in dic.keys():
                flag=0
                break
            else :
                temp=dic[word]<<word_num
                temp=temp.__and__(ans_bitset)
                ans_bitset=temp
                
        if flag == 1:
            for pos in range(file_size):
                if ans_bitset[pos+1]==1:
                    print("Match found from word",pos+1,"in file",file_name[dic_num])
    query=input()
    

IIIT
Match found from word 1 in file file2.txt
Match found from word 40 in file file2.txt
