Skip to content

Commit

Permalink
Adding FuzzyClassifier
Browse files Browse the repository at this point in the history
  • Loading branch information
loristns committed Jun 14, 2018
1 parent dbf3fb2 commit db6cff5
Showing 1 changed file with 29 additions and 0 deletions.
29 changes: 29 additions & 0 deletions kadot/classifiers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from kadot.fuzzy import extract, ratio
from kadot.tokenizers import corpus_tokenizer, regex_tokenizer, Tokens
from kadot.utils import SavedObject, unique_words
from kadot.vectorizers import centroid_document_vectorizer, \
Expand Down Expand Up @@ -195,3 +196,31 @@ def predict(self, text: str) -> Dict[str, float]:
class_prediction[class_name] = float(proba)

return class_prediction


class FuzzyClassifier(SavedObject):

def __init__(self,
train: Dict[str, str],
ratio_function: Callable[..., float] = ratio,
tokenizer: Callable[..., Tokens] = regex_tokenizer
):

self.train_samples, self.train_labels = zip(*train.items())
self.labels = unique_words(self.train_labels)

self.ratio_function = ratio_function
self.tokenizer = tokenizer

def predict(self, text: str) -> Dict[str, float]:
tokens = self.tokenizer(text)
scores = extract(tokens, corpus_tokenizer(self.train_samples))

class_prediction = {label: 0. for label in self.labels}

for (sample, score) in scores:
sample_label = self.train_labels[self.train_samples.index(sample)]
if class_prediction[sample_label] < score:
class_prediction[sample_label] = score

return class_prediction

0 comments on commit db6cff5

Please sign in to comment.