In [1]:
import pkg_resources
import sys
from pathlib import Path

sys.path.append(str(Path.cwd().parent))

from symspellpy import SymSpell, Verbosity
from symspellpy.editdistance import DistanceAlgorithm


In [3]:
bigram_path = pkg_resources.resource_filename(
    "symspellpy", "frequency_bigramdictionary_en_243_342.txt"
)
dictionary_path = pkg_resources.resource_filename(
    "symspellpy", "frequency_dictionary_en_82_765.txt"
)

sym_spell_damerau_osa = SymSpell()
sym_spell_damerau_osa.distance_algorithm = DistanceAlgorithm.DAMERAU_OSA
sym_spell_damerau_osa.load_bigram_dictionary(bigram_path, 0, 2)
sym_spell_damerau_osa.load_dictionary(dictionary_path, 0, 1)

sym_spell_damerau_osa_fast = SymSpell()
sym_spell_damerau_osa_fast.distance_algorithm = DistanceAlgorithm.DAMERAU_OSA_FAST
sym_spell_damerau_osa_fast.load_bigram_dictionary(bigram_path, 0, 2)
sym_spell_damerau_osa_fast.load_dictionary(dictionary_path, 0, 1)

sym_spell_levenshtein = SymSpell()
sym_spell_levenshtein.distance_algorithm = DistanceAlgorithm.LEVENSHTEIN
sym_spell_levenshtein.load_bigram_dictionary(bigram_path, 0, 2)
sym_spell_levenshtein.load_dictionary(dictionary_path, 0, 1)

sym_spell_levenshtein_fast = SymSpell()
sym_spell_levenshtein_fast.distance_algorithm = DistanceAlgorithm.LEVENSHTEIN_FAST
sym_spell_levenshtein_fast.load_bigram_dictionary(bigram_path, 0, 2)
sym_spell_levenshtein_fast.load_dictionary(dictionary_path, 0, 1)

sym_spell_ukkonen = SymSpell()
sym_spell_ukkonen.distance_algorithm = DistanceAlgorithm.UKKONEN
sym_spell_ukkonen.load_bigram_dictionary(bigram_path, 0, 2)
sym_spell_ukkonen.load_dictionary(dictionary_path, 0, 1)

True

In [4]:
def lookup_damerau_osa():
    sym_spell_damerau_osa.lookup("tepmperamet", Verbosity.ALL)

def lookup_damerau_osa_fast():
    sym_spell_damerau_osa_fast.lookup("tepmperamet", Verbosity.ALL)

def lookup_levenshtein():
    sym_spell_levenshtein.lookup("tepmperamet", Verbosity.ALL)

def lookup_levenshtein_fast():
    sym_spell_levenshtein_fast.lookup("tepmperamet", Verbosity.ALL)

def lookup_ukkonen():
    sym_spell_ukkonen.lookup("tepmperamet", Verbosity.ALL)

def lookup_compound_damerau_osa():
    sym_spell_damerau_osa.lookup_compound("whereis th elove hehad dated forImuch of thepast who couqdn'tread in sixthgrade and ins pired him", 2)

def lookup_compound_damerau_osa_fast():
    sym_spell_damerau_osa_fast.lookup_compound("whereis th elove hehad dated forImuch of thepast who couqdn'tread in sixthgrade and ins pired him", 2)

def lookup_compound_levenshtein():
    sym_spell_levenshtein.lookup_compound("whereis th elove hehad dated forImuch of thepast who couqdn'tread in sixthgrade and ins pired him", 2)

def lookup_compound_levenshtein_fast():
    sym_spell_levenshtein_fast.lookup_compound("whereis th elove hehad dated forImuch of thepast who couqdn'tread in sixthgrade and ins pired him", 2)

def lookup_compound_ukkonen():
    sym_spell_ukkonen.lookup_compound("whereis th elove hehad dated forImuch of thepast who couqdn'tread in sixthgrade and ins pired him", 2)

def word_segmentation_damerau_osa():
    sym_spell_damerau_osa.word_segmentation("thequickbrownfoxjumpsoverthelazydog", 0)

def word_segmentation_damerau_osa_fast():
    sym_spell_damerau_osa_fast.word_segmentation("thequickbrownfoxjumpsoverthelazydog", 0)

def word_segmentation_levenshtein():
    sym_spell_levenshtein.word_segmentation("thequickbrownfoxjumpsoverthelazydog", 0)

def word_segmentation_levenshtein_fast():
    sym_spell_levenshtein_fast.word_segmentation("thequickbrownfoxjumpsoverthelazydog", 0)

In [5]:
%timeit lookup_damerau_osa()
%timeit lookup_damerau_osa_fast()
%timeit lookup_levenshtein()
%timeit lookup_levenshtein_fast()
%timeit lookup_ukkonen()

220 µs ± 1.35 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
128 µs ± 538 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
193 µs ± 593 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
127 µs ± 305 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
215 µs ± 698 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [6]:
%timeit lookup_compound_damerau_osa()
%timeit lookup_compound_damerau_osa_fast()
%timeit lookup_compound_levenshtein()
%timeit lookup_compound_levenshtein_fast()
%timeit lookup_compound_ukkonen()

20.1 ms ± 181 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
9.48 ms ± 34 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
17.8 ms ± 48.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
9.39 ms ± 38.2 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
22.6 ms ± 79.2 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [6]:
%timeit word_segmentation_damerau_osa()
%timeit word_segmentation_damerau_osa_fast()
%timeit word_segmentation_levenshtein()
%timeit word_segmentation_levenshtein_fast()

1.64 ms ± 23.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.65 ms ± 19.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.63 ms ± 4.87 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.63 ms ± 3.91 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


**Note**: Result for `word_segmentation` is expected since we are passing `max_edit_distance=0`.