#### A simple search

In [45]:
from collections import defaultdict
from json import dumps, load
from functools import reduce

URLS = ['1.txt', '2.txt']
INDEX_FILE = 'index.txt'
INDEX_MAP = dict()

# load the indices from the file and return a 
# dictionary of index
def load_index(index_file):
    index_map = dict()
    try:
        with open(index_file) as file:
            index_map = load(file)
    except FileNotFoundError:
        pass
    return index_map

# dump the index to the index file
def write_index_to_index_file(index_file, index_map):
    with open(index_file, "w") as file:
        file.write(dumps(index_map))

# build the index map, here we assume that all words
# have same key.
def build_index_map(index_map, words, key):
    for word in words:
        # word is already present in the index
        if word in index_map:
            values_dict = index_map[word]
            # The resource is already present, then
            # we need to probably update the counter
            if key in values_dict:
                values_dict[key] += 1
            else:
                values_dict[key] = 1
        else:
            # word is not present, we create and 
            # set the counter to 1
            index_map[word] = dict()
            index_map[word][key] = 1

# Walk over the file and build index for each word
def read_build_index_file(filename):
    with open(filename) as file:
        for line in file:
            build_index_map(INDEX_MAP, line.strip().split(), filename)
            
# This simple method returns your search results sorted
def search_word(word, index_map):
    if word in index_map:
        items = index_map[word]    
        return sorted(items, key=lambda key:items[key], reverse=True)
    return []
    
# Search word with logic AND
def search_words_and(words, index_map):
    print(words)
    lst = list(reduce(set.intersection, 
                      [set(search_word(word, index_map)) 
                       for word in words]))
    return lst

# Search word with logic AND
def search_words_or(words, index_map):
    lst = list(reduce(set.union, 
                      [set(search_word(word, index_map)) 
                       for word in words]))
    return lst

# for all the text files, index the words
for file in URLS:
    read_build_index_file(file)
    
# write the index back to a file which can be reused later
write_index_to_index_file(INDEX_FILE, INDEX_MAP)

# load the index from the file
new_map = load_index(INDEX_FILE)
#print(search_word('In', new_map))

print(search_words_and(['In', 'a'], new_map))
print(search_words_or(['In', 'a'], new_map))

['In', 'a']
['1.txt', '2.txt']
['1.txt', '2.txt']
