Skip to content

Commit

Permalink
Merge pull request #1450 from mabel-dev/#1447
Browse files Browse the repository at this point in the history
  • Loading branch information
joocer committed Feb 16, 2024
2 parents 3b1253b + 3c5c667 commit eaea5ef
Show file tree
Hide file tree
Showing 8 changed files with 57 additions and 70 deletions.
2 changes: 1 addition & 1 deletion opteryx/__version__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__build__ = 302
__build__ = 304

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
2 changes: 1 addition & 1 deletion opteryx/compiled/functions/hash_table.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

File renamed without changes.
47 changes: 47 additions & 0 deletions opteryx/compiled/levenshtein/clevenshtein.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# cython: language_level=3

import numpy as np # Required for array allocation

cdef int min3(int x, int y, int z):
"""Utility function to find the minimum of three integers."""
cdef int m = x
if y < m:
m = y
if z < m:
m = z
return m

def levenshtein(str string1, str string2):
"""
Calculate the Levenshtein distance between two strings.
Parameters:
string1 (str): The first string to compare.
string2 (str): The second string to compare.
Returns:
int: The Levenshtein distance between string1 and string2.
"""
cdef int len1 = len(string1)
cdef int len2 = len(string2)
cdef int i, j

# Allocate a numpy array and create a memory view from it
cdef int[:] dp = np.zeros((len1 + 1) * (len2 + 1), dtype=np.int32)

for i in range(len1 + 1):
for j in range(len2 + 1):
if i == 0:
dp[i * (len2 + 1) + j] = j # First string is empty
elif j == 0:
dp[i * (len2 + 1) + j] = i # Second string is empty
elif string1[i - 1] == string2[j - 1]:
dp[i * (len2 + 1) + j] = dp[(i - 1) * (len2 + 1) + (j - 1)]
else:
dp[i * (len2 + 1) + j] = 1 + min3(
dp[(i - 1) * (len2 + 1) + j], # Remove
dp[i * (len2 + 1) + (j - 1)], # Insert
dp[(i - 1) * (len2 + 1) + (j - 1)] # Replace
)

return dp[len1 * (len2 + 1) + len2]
11 changes: 6 additions & 5 deletions opteryx/functions/string_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,10 +300,11 @@ def rtrim(*args):


def levenshtein(a, b):
from opteryx.third_party.levenshtein import levenshtein as lev
from opteryx.compiled.levenshtein import levenshtein as lev

def _outer():
for index, value in enumerate(a):
yield lev(value, b[index])
# Convert numpy arrays to lists
a_list = a.tolist()
b_list = b.tolist()

return list(_outer())
# Use zip to iterate over pairs of elements from a and b
return [lev(value_a, value_b) for value_a, value_b in zip(a_list, b_list)]
61 changes: 0 additions & 61 deletions opteryx/third_party/levenshtein/clevenshtein.pyx

This file was deleted.

2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def rust_build(setup_kwargs: Dict[str, Any]) -> None:
),
Extension(
name="clevenshtein",
sources=["opteryx/third_party/levenshtein/clevenshtein.pyx"],
sources=["opteryx/compiled/levenshtein/clevenshtein.pyx"],
extra_compile_args=COMPILE_FLAGS,
),
Extension(
Expand Down
2 changes: 1 addition & 1 deletion tests/misc/test_levenshtien.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import pytest

from opteryx.third_party.levenshtein import levenshtein
from opteryx.compiled.levenshtein import levenshtein

# fmt:off
TESTS = [
Expand Down

0 comments on commit eaea5ef

Please sign in to comment.