-
Notifications
You must be signed in to change notification settings - Fork 0
/
definitions.py
89 lines (88 loc) · 2.36 KB
/
definitions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
'''
---------------------------------------
io/
input/
# contains file to be matched -> one per job
tender.csv
output/
---------------------------------------
ners/
matcher.py
# imports: []
def matcher()
trainer.py
# imports: []
def trainer()
---------------------------------------
preprocessor/
loader.py
# imports: [ xlrd, csv, os, sys, re ]
# helper functions
def get_path(d)
def get_row_heads()
def import_json(d)
def import_pickle(d)
def import_xls(d)
def import_csv(d)
def import_txt(d)
# controller function
def load_doc(d)
string_cleaner.py
# imports: [ unicodedata2, re ]
# helper functions
# def lemmatizer(d)
def remove_accents(d)
def remove_special_chars(d)
def remove_whitespace(d)
def normalizer(d)
def apply_custom_rules(d)
# controller function
def string_cleaner(d)
---------------------------------------
processor/
distance_encoder.py
# imports: [ jellyfish ]
def levenshtein(s1, s2)
def damerau_levenshtein(s1, s2)
def jaro(s1, s2)
def jaro_winkler(s1, s2)
def hamming(s1, s2)
def match_rating_comparison(s1, s2)
phonetic_encoder.py
# imports: [ sys, jellyfish, doublemetaphone ]
def soundex(s)
def metaphone(s)
def double_metaphone(s)
def nysiis(s)
def match_rating_codex(s)
nlp_object_processor.py
# imports[ spacy, STOP_WORDS ]
# custom pipes
def colname_tagger(d)
def remove_stop_words(d)
def apply_cleanup_rules(d)
def commonkey_tagger(d)
def sentence_segmenter(d)
# helper functions
def create_nlp_pipeline(nlp)
def modify_stop_words()
def pickle_an_nlpobj(nobj, pname)
# controller function
def process_nlp_object(d)
---------------------------------------
stores/
lookups/
bhSuppliers.json
masters/
erp10.csv
models/
pickles/
taxonomies/
bearings.json
pumps.json
---------------------------------------
test/
modulesMethods.json
testController.py
# imports: [ json, sys, importlib ]
'''