-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
30 lines (23 loc) · 881 Bytes
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#!/usr/bin/env python3
import fnmatch
import logging
import os
import re
from consts import CORPUS_DOCS
from enigma.indexer import Indexer
logging.basicConfig(format='%(asctime)s : %(module)s: %(levelname)s : %(message)s', level=logging.DEBUG)
includes = ['*.txt']
# transform glob patterns to regular expressions
includes = r'|'.join([fnmatch.translate(x) for x in includes])
load_from_disk = True
indexer = Indexer(load_from_disk)
if not load_from_disk:
for path, sub_dirs, files in os.walk(CORPUS_DOCS):
files = [f for f in files if re.match(includes, f)]
for id, filename in enumerate(files):
file_path = path + '/' + filename
indexer.index_doc(doc_path=file_path, doc_id=id)
indexer.setup_corpus_index()
print(indexer.search(query="arrhythmia",
neighbors=False,
synonyms=False))