In [1]:
import importlib.resources
import sys
from pathlib import Path

sys.path.append(str(Path.cwd().parent))

from symspellpy import SymSpell, Verbosity
from symspellpy.editdistance import DistanceAlgorithm, EditDistance

In [2]:
bigram_path = importlib.resources.files("symspellpy") / "frequency_bigramdictionary_en_243_342.txt"

dictionary_path = importlib.resources.files("symspellpy") /  "frequency_dictionary_en_82_765.txt"

sym_spell_damerau_osa = SymSpell(distance_comparer=EditDistance(DistanceAlgorithm.DAMERAU_OSA))
sym_spell_damerau_osa.load_bigram_dictionary(bigram_path, 0, 2)
sym_spell_damerau_osa.load_dictionary(dictionary_path, 0, 1)

sym_spell_damerau_osa_fast = SymSpell(distance_comparer=EditDistance(DistanceAlgorithm.DAMERAU_OSA_FAST))
sym_spell_damerau_osa_fast.load_bigram_dictionary(bigram_path, 0, 2)
sym_spell_damerau_osa_fast.load_dictionary(dictionary_path, 0, 1)

sym_spell_levenshtein = SymSpell(distance_comparer=EditDistance(DistanceAlgorithm.LEVENSHTEIN))
sym_spell_levenshtein.load_bigram_dictionary(bigram_path, 0, 2)
sym_spell_levenshtein.load_dictionary(dictionary_path, 0, 1)

sym_spell_levenshtein_fast = SymSpell(distance_comparer=EditDistance(DistanceAlgorithm.LEVENSHTEIN_FAST))
sym_spell_levenshtein_fast.load_bigram_dictionary(bigram_path, 0, 2)
sym_spell_levenshtein_fast.load_dictionary(dictionary_path, 0, 1)

True

In [3]:
def lookup_damerau_osa():
    sym_spell_damerau_osa.lookup("tepmperamet", Verbosity.ALL)

def lookup_damerau_osa_fast():
    sym_spell_damerau_osa_fast.lookup("tepmperamet", Verbosity.ALL)

def lookup_levenshtein():
    sym_spell_levenshtein.lookup("tepmperamet", Verbosity.ALL)

def lookup_levenshtein_fast():
    sym_spell_levenshtein_fast.lookup("tepmperamet", Verbosity.ALL)

def lookup_compound_damerau_osa():
    sym_spell_damerau_osa.lookup_compound("whereis th elove hehad dated forImuch of thepast who couqdn'tread in sixthgrade and ins pired him", 2)

def lookup_compound_damerau_osa_fast():
    sym_spell_damerau_osa_fast.lookup_compound("whereis th elove hehad dated forImuch of thepast who couqdn'tread in sixthgrade and ins pired him", 2)

def lookup_compound_levenshtein():
    sym_spell_levenshtein.lookup_compound("whereis th elove hehad dated forImuch of thepast who couqdn'tread in sixthgrade and ins pired him", 2)

def lookup_compound_levenshtein_fast():
    sym_spell_levenshtein_fast.lookup_compound("whereis th elove hehad dated forImuch of thepast who couqdn'tread in sixthgrade and ins pired him", 2)

def word_segmentation_damerau_osa():
    sym_spell_damerau_osa.word_segmentation("thequickbrownfoxjumpsoverthelazydog", 0)

def word_segmentation_damerau_osa_fast():
    sym_spell_damerau_osa_fast.word_segmentation("thequickbrownfoxjumpsoverthelazydog", 0)

def word_segmentation_levenshtein():
    sym_spell_levenshtein.word_segmentation("thequickbrownfoxjumpsoverthelazydog", 0)

def word_segmentation_levenshtein_fast():
    sym_spell_levenshtein_fast.word_segmentation("thequickbrownfoxjumpsoverthelazydog", 0)

In [4]:
%timeit lookup_damerau_osa()
%timeit lookup_damerau_osa_fast()
%timeit lookup_levenshtein()
%timeit lookup_levenshtein_fast()

107 μs ± 356 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
67.6 μs ± 319 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
95.4 μs ± 563 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)
66.7 μs ± 295 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [5]:
%timeit lookup_compound_damerau_osa()
%timeit lookup_compound_damerau_osa_fast()
%timeit lookup_compound_levenshtein()
%timeit lookup_compound_levenshtein_fast()

9.89 ms ± 65.3 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
5.1 ms ± 13.1 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
8.68 ms ± 46.6 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)
4.95 ms ± 13.2 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [6]:
%timeit word_segmentation_damerau_osa()
%timeit word_segmentation_damerau_osa_fast()
%timeit word_segmentation_levenshtein()
%timeit word_segmentation_levenshtein_fast()

1.13 ms ± 1.36 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
1.14 ms ± 2.94 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
1.14 ms ± 3.56 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
1.14 ms ± 1.6 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


**Note**: Result for `word_segmentation` is expected since we are passing `max_edit_distance=0`.