In [70]:
#importando Indice_Invertido.ipynb
import io, os, sys, types
from IPython import get_ipython
from nbformat import current
from IPython.core.interactiveshell import InteractiveShell


def find_notebook(fullname, path=None):
    name = fullname.rsplit('.', 1)[-1]
    if not path:
        path = ['']
    for d in path:
        nb_path = os.path.join(d, name + ".ipynb")
        if os.path.isfile(nb_path):
            return nb_path
        #permite a importação de algum "Notebook.ipynb"
        nb_path = nb_path.replace("_", " ")
        if os.path.isfile(nb_path):
            return nb_path


class NotebookLoader(object):
    def __init__(self, path=None):
        self.Shell = InteractiveShell.instance()
        self.path = path

    def load_module(self, fullname):
        #importa um notebook como um modulo
        path = find_notebook(fullname, self.path)

        print ("importando notebook de %s" % path)

        with io.open(path, 'r', encoding='utf-8') as f:
            nb = current.read(f, 'json')

        mod = types.ModuleType(fullname)
        mod.__file__ = path
        mod.__loader__ = self
        mod.__dict__['get_ipython'] = get_ipython
        sys.modules[fullname] = mod

        save_user_ns = self.Shell.user_ns
        self.Shell.user_ns = mod.__dict__

        try:
            for cell in nb.worksheets[0].cells:
                if cell.cell_type == 'code' and cell.language == 'python':
                    #transformação do input pra ser executavel em Python
                    code = self.Shell.input_transformer_manager.transform_cell(cell.input)
                    exec(code, mod.__dict__)
        finally:
            self.Shell.user_ns = save_user_ns
        return mod


class NotebookFinder(object):
    def __init__(self):
        self.loaders = {}

    def find_module(self, fullname, path=None):
        nb_path = find_notebook(fullname, path)
        if not nb_path:
            return

        key = path
        if path:
            key = os.path.sep.join(path)

        if key not in self.loaders:
            self.loaders[key] = NotebookLoader(path)
        return self.loaders[key]

sys.meta_path.append(NotebookFinder())

import Indice_Invertido

In [71]:
import math
import numpy as np
import re
import argparse
import os
import glob
from pathlib import Path
import time

#o "assert" só retorna um aviso em caso de erro

In [85]:
#testando a função "formataString" com todos os caracteres possíveis
def test_formataString(s):
    assert Indice_Invertido.Indice_Invertido.formataString(s) == ['abc', 'defs', 'lmnopq', 'fdfdj',
    'jfkjdfdd', '2gh3h4vggl', '058', '23848l9', '9ssd8fd43', 'as', '379', 'd', 'gf', 'aaa', 'bbdd', 
    'sja', '3kj3', '984jd', 'cc', 'akjs', 'jsa', 'dskk', 'llll', 'sks', 'aall', 'eee', 'be', 'aa',
    'hh', '3237l']
    

strTest = '''ABC defs lmNOpQ áfdFDj íJFKjdêfddã 2gh3h4vgÁg~L 058 23848l9 9ÁSSD´8fd43
'!as!é379'] /d|gf\ #aaa#bbdd %sja &3kj3 (984jd )cc *akjs +JSA ds-KK .llLL :sks aa;LL <ee~e =ã>? @báe[
àaa] _è_HH_ { 323}7l
'''

test_formataString(strTest)

In [93]:
#testando a função "indiceInvertido"

txtTest1 = ['palavra', 'Digo', 'digo', 'batAta', 'feijão', 'batata1', 'palavra', 'BATATA', 'feijao',
             'batAta', 'digo', 'batata', 'repete']
txtTest2 = ['arroz', 'ARROZ', 'batata', 'FEIJÃO', 'repete', 'figo', 'repete', 'batata1']

arqTest = {1:txtTest1, 2:txtTest2}

#verificando uma subparte da função "indiceInvertido"
vocabulario = []
for k, v in arqTest.items():
        ignorarIndice = []
        for i in range(len(v)):
            frequenciaNesteDoc = 1
            if (ignorarIndice.count(i) != 0):
                continue
            for j in range(i+1, len(v)):
                if (v[i] == v[j]):
                    frequenciaNesteDoc+=1
                    ignorarIndice.append(j)
                        
            vocabulario.append([v[i], k, frequenciaNesteDoc])

            
assert vocabulario == [['palavra', 1, 2], ['Digo', 1, 1], ['digo', 1, 2], ['batAta', 1, 2],
                       ['feijão', 1, 1], ['batata1', 1, 1], ['BATATA', 1, 1], ['feijao', 1, 1],
                       ['batata', 1, 1], ['repete', 1, 1], ['arroz', 2, 1], ['ARROZ', 2, 1],
                       ['batata', 2, 1], ['FEIJÃO', 2, 1], ['repete', 2, 2], ['figo', 2, 1],
                       ['batata1', 2, 1]]

#verificando a saída da função
def test_indiceInvertido(arquivo):
    assert Indice_Invertido.Indice_Invertido.indiceInvertido(arquivo) == {'palavra': [[1, 2]],
    'Digo': [[1, 1]], 'digo': [[1, 2]], 'batAta': [[1, 2]], 'feijão': [[1, 1]],
    'batata1': [[1, 1], [2, 1]], 'BATATA': [[1, 1]], 'feijao': [[1, 1]], 'batata': [[1, 1], [2, 1]],
    'repete': [[1, 1], [2, 2]], 'arroz': [[2, 1]], 'ARROZ': [[2, 1]], 'FEIJÃO': [[2, 1]],
    'figo': [[2, 1]]}
    
    
test_indiceInvertido(arqTest)

In [87]:
#testando a função "coordenadas"
def test_coordenadas(indInv, numDocs):
    resultTest = np.array([[0.,2.19722458,0.], [0.,1.09861229,0.], [0.,2.19722458,0.], [0.,2.19722458,0.],
                      [0.,1.09861229,0.], [0.,0.40546511,0.40546511], [0.,1.09861229,0.],
                      [0.,1.09861229,0.], [0.,0.40546511,0.40546511], [0.,0.40546511,0.81093022],
                      [0.,0.,1.09861229], [0.,0.,1.09861229], [0.,0.,1.09861229], [0.,0.,1.09861229]])
    
    resultFunc = Indice_Invertido.Indice_Invertido.coordenadas(indInv, numDocs)
    
    print(f'''{resultFunc}
__________________

{resultTest}''')
    
    
indTest = {'palavra': [[1, 2]], 'Digo': [[1, 1]], 'digo': [[1, 2]], 'batAta': [[1, 2]],
            'feijão': [[1, 1]], 'batata1': [[1, 1], [2, 1]], 'BATATA': [[1, 1]], 'feijao': [[1, 1]],
            'batata': [[1, 1], [2, 1]], 'repete': [[1, 1], [2, 2]], 'arroz': [[2, 1]],
            'ARROZ': [[2, 1]], 'FEIJÃO': [[2, 1]], 'figo': [[2, 1]]}
numTest = 3

test_coordenadas(indTest, numTest)

[[0.   2.2  0.  ]
 [0.   1.1  0.  ]
 [0.   2.2  0.  ]
 [0.   2.2  0.  ]
 [0.   1.1  0.  ]
 [0.   0.41 0.41]
 [0.   1.1  0.  ]
 [0.   1.1  0.  ]
 [0.   0.41 0.41]
 [0.   0.41 0.81]
 [0.   0.   1.1 ]
 [0.   0.   1.1 ]
 [0.   0.   1.1 ]
 [0.   0.   1.1 ]]
__________________

[[0.   2.2  0.  ]
 [0.   1.1  0.  ]
 [0.   2.2  0.  ]
 [0.   2.2  0.  ]
 [0.   1.1  0.  ]
 [0.   0.41 0.41]
 [0.   1.1  0.  ]
 [0.   1.1  0.  ]
 [0.   0.41 0.41]
 [0.   0.41 0.81]
 [0.   0.   1.1 ]
 [0.   0.   1.1 ]
 [0.   0.   1.1 ]
 [0.   0.   1.1 ]]
