In [81]:
import time
import pathlib
import sys
import os
import json
from operator import itemgetter
from tqdm import tqdm_notebook as tqdm
from IPython.display import clear_output
from warp import *

# Some global variables to use all way in the code
stop_words = ["a", "o", "ao", "de", "da", "do", "um", "uma", "uns", "pra", "para", "por", "com", "gb"]
INVERTED_INDEX_PATH = "../../index/Books/inverted_index.json"

## Auxiliar Functions

In [85]:
def load_json_file(path_to_file):
    """
    Loads a json file and return the dictionary.
    
    Parameters:
    path_to_file(str): Path to the json file
    
    Returns:
    dictionary: Contains the information that is stored in the json file as a dictionary
    """
    inverted_index = {}
    with open(INVERTED_INDEX_PATH) as json_file:
        inverted_index = json.load(json_file)
    return inverted_index

def rank_docs(docs_scores):
    """
    Ranks the docs based on the frequecy of the terms in the docs
    
    Parameters:
    docs_scores(dictionary<int, int>): Dictionary with the keys as 
    
    Returns:
    list: A list sorted in descending order from the document with the highest term frequency to the lowest
    """
    return sorted(docs_scores.items(), key=itemgetter(1), reverse=True)
    

## Search using a single string

In [86]:
def search_by_keywords():
    QUERY = input("Digite o smartphone ou características do smartphone que deseja pesquisar: ")
    QUERY = QUERY.lower().split(" ")

    # filter stop words and loads json file
    QUERY = [word for word in QUERY if (word not in stop_words)]
    inverted_index = load_json_file(INVERTED_INDEX_PATH)
    
    # dictionary to the save the score of each page
    docs_scores = {}
    
    # adds the scores to each page ID (term-at-a-time)
    for word in QUERY:
        if word in inverted_index:
            #print("------{}------".format(word))
            #print(inverted_index[word])
            for doc_sc in inverted_index[word]:
                if doc_sc[0] in docs_scores:
                    docs_scores[doc_sc[0]] += doc_sc[1] 
                else:                    
                    docs_scores[doc_sc[0]]  = doc_sc[1]         
                    
    # free memory unused
    inverted_index = []
    
    print(rank_docs(docs_scores))

In [87]:
search_by_keywords()

Digite o smartphone ou características do smartphone que deseja pesquisar: iphone 32gb
[(397, 37), (180, 22), (367, 22), (372, 22), (377, 22), (384, 22), (385, 22), (390, 22), (159, 21), (166, 21), (105, 20), (121, 20), (130, 20), (177, 20), (178, 20), (360, 20), (361, 20), (368, 20), (379, 20), (387, 20), (162, 19), (338, 19), (369, 18), (371, 18), (375, 18), (378, 18), (383, 18), (386, 18), (391, 18), (407, 18), (414, 18), (421, 18), (433, 18), (164, 17), (350, 17), (366, 17), (381, 17), (388, 17), (393, 17), (395, 17), (402, 17), (428, 17), (101, 16), (122, 16), (123, 16), (129, 16), (131, 16), (134, 16), (139, 16), (146, 16), (148, 16), (151, 16), (153, 16), (156, 16), (158, 16), (175, 16), (358, 16), (392, 16), (417, 16), (430, 16), (102, 15), (103, 15), (106, 15), (107, 15), (108, 15), (111, 15), (112, 15), (115, 15), (117, 15), (119, 15), (125, 15), (126, 15), (133, 15), (135, 15), (136, 15), (138, 15), (140, 15), (141, 15), (142, 15), (145, 15), (147, 15), (150, 15), (152, 15),

## Search using attributes and values

In [76]:
def search_by_attributes():
    PRODUCT = {
        "price" :"",
        "model" :"",
        "ram"   :"", 
        "hd"    :"", 
        "screen":""
    } 

    PRODUCT["price"]  = input("Insira a faixa de preço do smartphone (Ex: R$1000,00 - R$2000,00): ")
    PRODUCT["model"]  = input("Insira o modelo do smartphone: ")
    PRODUCT["ram"]    = input("Informe a faixa de RAM do smartphone (Ex: 4GB - 8GB: ")
    PRODUCT["hd"]     = input("Insira a faixa de capacidade de armazenamento desejada (Ex: 32GB - 64GB): ")
    PRODUCT["screen"] = input("""Informe o menor e maior tamanho de tela (Ex: 4,7" - 5,2": """)

## User's Menu

In [35]:
CMD = -1;
while(CMD != 0):
    print("1 - Para fazer uma busca através de palavras chaves")
    print("2 - Para uma busca através de atributos específicos")
    print("3 - Para limpar a tela")
    print("0 - Para sair")
    CMD = int(input())
    
    if(CMD == 0):
        pass
    elif(CMD == 1):
        search_by_keywords()
    elif(CMD == 2):
        search_by_attributes()
    elif(CMD == 3):
        clear_output()
    else:
        print("Comando inválido")

1 - Para fazer uma busca através de palavras chaves
2 - Para uma busca através de atributos específicos
3 - Para limpar a tela
0 - Para sair
0
