Skip to content

Commit

Permalink
Port _astrcmp to newer Python Unicode API
Browse files Browse the repository at this point in the history
Requires Python >= 3.3
  • Loading branch information
phw committed Oct 18, 2018
1 parent 55a6c22 commit ac8d804
Showing 1 changed file with 18 additions and 11 deletions.
29 changes: 18 additions & 11 deletions picard/util/_astrcmp.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
/*
* Picard, the next-generation MusicBrainz tagger
* Copyright (C) 2018 Philipp Wolfer
* Copyright (C) 2006 Lukáš Lalinský
* Copyright (C) 2003 Benbuck Nason
*
Expand Down Expand Up @@ -56,8 +57,8 @@
#define MAX(x, y) (((x) > (y)) ? (x) : (y))
#define MATRIX(a, b) matrix[(b) * (len1 + 1) + (a)]

float LevenshteinDistance(const Py_UNICODE * s1, int len1,
const Py_UNICODE * s2, int len2)
float LevenshteinDistance(int k1, const void * s1, Py_ssize_t len1,
int k2, const void * s2, Py_ssize_t len2)
{
int *matrix, index1, index2;
float result;
Expand Down Expand Up @@ -87,14 +88,14 @@ float LevenshteinDistance(const Py_UNICODE * s1, int len1,

for (index1 = 1; index1 <= len1; index1++)
{
Py_UNICODE s1_current = s1[index1 - 1];
Py_UCS4 s1_current = PyUnicode_READ(k1, s1, index1 - 1);

/* Step 4 */
/* Loop through second string */

for (index2 = 1; index2 <= len2; index2++)
{
Py_UNICODE s2_current = s2[index2 - 1];
Py_UCS4 s2_current = PyUnicode_READ(k2, s2, index2 - 1);

/* Step 5 */
/* Calculate cost of this iteration
Expand All @@ -119,9 +120,9 @@ float LevenshteinDistance(const Py_UNICODE * s1, int len1,
if (index1 > 2 && index2 > 2)
{
int trans = MATRIX(index1 - 2, index2 - 2) + 1;
if (s1[index1 - 2] != s2_current)
if (PyUnicode_READ(k1, s1, index1 - 2) != s2_current)
trans++;
if (s1_current != s2[index2 - 2])
if (s1_current != PyUnicode_READ(k2, s2, index2 - 2))
trans++;
if (cell > trans)
cell = trans;
Expand All @@ -147,20 +148,26 @@ astrcmp(PyObject *self, PyObject *args)
{
PyObject *s1, *s2;
float d;
const Py_UNICODE *us1, *us2;
int len1, len2;
const void *ud1, *ud2;
int k1, k2;
Py_ssize_t len1, len2;
PyThreadState *_save;

if (!PyArg_ParseTuple(args, "UU", &s1, &s2))
return NULL;

us1 = PyUnicode_AS_UNICODE(s1);
us2 = PyUnicode_AS_UNICODE(s2);
if (PyUnicode_READY(s1) == -1 || PyUnicode_READY(s2) == -1)
return NULL;

k1 = PyUnicode_KIND(s1);
k2 = PyUnicode_KIND(s2);
ud1 = PyUnicode_DATA(s1);
ud2 = PyUnicode_DATA(s2);
len1 = PyUnicode_GetLength(s1);
len2 = PyUnicode_GetLength(s2);

Py_UNBLOCK_THREADS
d = LevenshteinDistance(us1, len1, us2, len2);
d = LevenshteinDistance(k1, ud1, len1, k2, ud2, len2);
Py_BLOCK_THREADS
return Py_BuildValue("f", d);
}
Expand Down

0 comments on commit ac8d804

Please sign in to comment.