Browse files

tre-python.c: support searching in Unicode strings.

Please note that the Unicode string is copied over
and converted to a wchar_t[] before matching.
This is unacceptable for large strings.

TODO: use tre_reguexec to iterate directly over the
Unicode string.
  • Loading branch information...
1 parent 8a4c5b6 commit a0f3f7dc407de06d14c291e2f87fe81b3c07d2d9 @avm avm committed with avm Jul 10, 2010
Showing with 30 additions and 5 deletions.
  1. +30 −5 python/tre-python.c
View
35 python/tre-python.c
@@ -337,9 +337,18 @@ PyTrePattern_search(TrePatternObject *self, PyObject *args)
char *targ;
size_t tlen;
- if (!PyArg_ParseTuple(args, "SO!|i:search", &pstring, &TreFuzzynessType,
+ if (PyTuple_Size(args) > 0 && PyUnicode_Check(PyTuple_GetItem(args, 0)))
+ {
+ if (!PyArg_ParseTuple(args, "UO!|i:search", &pstring, &TreFuzzynessType,
&fz, &eflags))
- return NULL;
+ return NULL;
+ }
+ else
+ {
+ if (!PyArg_ParseTuple(args, "SO!|i:search", &pstring, &TreFuzzynessType,
+ &fz, &eflags))
+ return NULL;
+ }
mo = newTreMatchObject();
if (mo == NULL)
@@ -356,10 +365,26 @@ PyTrePattern_search(TrePatternObject *self, PyObject *args)
mo->am.nmatch = nsub;
mo->am.pmatch = pm;
- targ = PyString_AsString(pstring);
- tlen = PyString_Size(pstring);
+ if (PyUnicode_Check(pstring))
+ {
+ Py_ssize_t len = PyUnicode_GetSize(pstring);
+ wchar_t *buf = calloc(sizeof(wchar_t), len);
+ if(!buf)
+ {
+ Py_DECREF(mo);
+ return PyErr_NoMemory();
+ }
+ PyUnicode_AsWideChar(pstring, buf, len);
+ rc = tre_regawnexec(&self->rgx, buf, len, &mo->am, fz->ap, eflags);
+ free(buf);
+ }
+ else
+ {
+ targ = PyString_AsString(pstring);
+ tlen = PyString_Size(pstring);
- rc = tre_reganexec(&self->rgx, targ, tlen, &mo->am, fz->ap, eflags);
+ rc = tre_reganexec(&self->rgx, targ, tlen, &mo->am, fz->ap, eflags);
+ }
if (PyErr_Occurred())
{

0 comments on commit a0f3f7d

Please sign in to comment.