In [1]:
import pkg_resources
import sys
from pathlib import Path

sys.path.append(str(Path.cwd().parent))

from symspellpy import SymSpell, Verbosity
from symspellpy.editdistance import DistanceAlgorithm


In [2]:
bigram_path = pkg_resources.resource_filename(
    "symspellpy", "frequency_bigramdictionary_en_243_342.txt"
)
dictionary_path = pkg_resources.resource_filename(
    "symspellpy", "frequency_dictionary_en_82_765.txt"
)

sym_spell_damerau_osa = SymSpell()
sym_spell_damerau_osa._distance_algorithm = DistanceAlgorithm.DAMERAU_OSA
sym_spell_damerau_osa.load_bigram_dictionary(bigram_path, 0, 2)
sym_spell_damerau_osa.load_dictionary(dictionary_path, 0, 1)

sym_spell_damerau_osa_fast = SymSpell()
sym_spell_damerau_osa_fast._distance_algorithm = DistanceAlgorithm.DAMERAU_OSA_FAST
sym_spell_damerau_osa_fast.load_bigram_dictionary(bigram_path, 0, 2)
sym_spell_damerau_osa_fast.load_dictionary(dictionary_path, 0, 1)

sym_spell_levenshtein = SymSpell()
sym_spell_levenshtein._distance_algorithm = DistanceAlgorithm.LEVENSHTEIN
sym_spell_levenshtein.load_bigram_dictionary(bigram_path, 0, 2)
sym_spell_levenshtein.load_dictionary(dictionary_path, 0, 1)

sym_spell_levenshtein_fast = SymSpell()
sym_spell_levenshtein_fast._distance_algorithm = DistanceAlgorithm.LEVENSHTEIN_FAST
sym_spell_levenshtein_fast.load_bigram_dictionary(bigram_path, 0, 2)
sym_spell_levenshtein_fast.load_dictionary(dictionary_path, 0, 1)

True

In [4]:
def lookup_damerau_osa():
    sym_spell_damerau_osa.lookup("tepmperamet", Verbosity.ALL)

def lookup_damerau_osa_fast():
    sym_spell_damerau_osa_fast.lookup("tepmperamet", Verbosity.ALL)

def lookup_levenshtein():
    sym_spell_levenshtein.lookup("tepmperamet", Verbosity.ALL)

def lookup_levenshtein_fast():
    sym_spell_levenshtein_fast.lookup("tepmperamet", Verbosity.ALL)

def lookup_compound_damerau_osa():
    sym_spell_damerau_osa.lookup_compound("whereis th elove hehad dated forImuch of thepast who couqdn'tread in sixthgrade and ins pired him", 2)

def lookup_compound_damerau_osa_fast():
    sym_spell_damerau_osa_fast.lookup_compound("whereis th elove hehad dated forImuch of thepast who couqdn'tread in sixthgrade and ins pired him", 2)

def lookup_compound_levenshtein():
    sym_spell_levenshtein.lookup_compound("whereis th elove hehad dated forImuch of thepast who couqdn'tread in sixthgrade and ins pired him", 2)

def lookup_compound_levenshtein_fast():
    sym_spell_levenshtein_fast.lookup_compound("whereis th elove hehad dated forImuch of thepast who couqdn'tread in sixthgrade and ins pired him", 2)

def word_segmentation_damerau_osa():
    sym_spell_damerau_osa.word_segmentation("thequickbrownfoxjumpsoverthelazydog", 0)

def word_segmentation_damerau_osa_fast():
    sym_spell_damerau_osa_fast.word_segmentation("thequickbrownfoxjumpsoverthelazydog", 0)

def word_segmentation_levenshtein():
    sym_spell_levenshtein.word_segmentation("thequickbrownfoxjumpsoverthelazydog", 0)

def word_segmentation_levenshtein_fast():
    sym_spell_levenshtein_fast.word_segmentation("thequickbrownfoxjumpsoverthelazydog", 0)

In [5]:
%timeit lookup_damerau_osa()
%timeit lookup_damerau_osa_fast()
%timeit lookup_levenshtein()
%timeit lookup_levenshtein_fast()

214 µs ± 770 ns per loop (mean ± std. dev. of 7 runs, 1000 loops each)
130 µs ± 538 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
192 µs ± 346 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
130 µs ± 369 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [6]:
%timeit lookup_compound_damerau_osa()
%timeit lookup_compound_damerau_osa_fast()
%timeit lookup_compound_levenshtein()
%timeit lookup_compound_levenshtein_fast()

20.5 ms ± 175 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
10.9 ms ± 217 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
18 ms ± 67 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
10.5 ms ± 125 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [7]:
%timeit word_segmentation_damerau_osa()
%timeit word_segmentation_damerau_osa_fast()
%timeit word_segmentation_levenshtein()
%timeit word_segmentation_levenshtein_fast()

1.62 ms ± 2.94 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.62 ms ± 7.84 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.75 ms ± 90.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
1.65 ms ± 12.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


**Note**: Result for `word_segmentation` is expected since we are passing `max_edit_distance=0`.