Skip to content

Commit d2860c6

Browse files
committed
refactor pyfribidi.c module
pyfribidi.c is now compiled as _pyfribidi. This module only handles unicode internally and doesn't use the fribidi_utf8_to_unicode function (which can't handle 4 byte utf-8 sequences). This fixes the buffer overflow in issue #2. The code is now also much simpler: pyfribidi.c is down from 280 to 130 lines of code. We now ship a pure python pyfribidi that handles the case when non-unicode strings are passed in. We now also adapt the size of the output string if clean=True is passed.
1 parent c80f50c commit d2860c6

File tree

5 files changed

+127
-310
lines changed

5 files changed

+127
-310
lines changed

Diff for: MANIFEST.in

+1-1
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ include fribidi-src/test/test_UTF-8_reordernsm.input
160160
include fribidi-src/test/test_UTF-8_reordernsm.reference
161161
include fribidi_systray.py
162162
include pyfribidi.c
163-
include pyfribidi.h
163+
include pyfribidi.py
164164
include pyfribidi2.py
165165
include setup.cfg
166166
include setup.py

Diff for: pyfribidi.c

+90-241
Original file line numberDiff line numberDiff line change
@@ -17,267 +17,116 @@
1717

1818
/* Copyright (C) 2005,2006,2010 Yaacov Zamir, Nir Soffer */
1919

20-
/* FriBidi python binding:
21-
22-
Install:
23-
python setup.py install
24-
25-
*/
2620

27-
#include <Python.h> /* must be first */
21+
#include <Python.h>
2822
#include <fribidi.h>
29-
#undef _POSIX_C_SOURCE
30-
31-
#include "pyfribidi.h"
3223

33-
#define MAX_STR_LEN 65000
3424

3525
static PyObject *
36-
_pyfribidi_log2vis (PyObject * self, PyObject * args, PyObject * kw)
26+
unicode_log2vis (PyUnicodeObject* string,
27+
FriBidiParType base_direction, int clean, int reordernsm)
3728
{
38-
PyObject *logical = NULL; /* input unicode or string object */
39-
FriBidiParType base = FRIBIDI_TYPE_RTL; /* optional direction */
40-
const char *encoding = "utf-8"; /* optional input string encoding */
41-
int clean = 0; /* optional flag to clean the string */
42-
int reordernsm = 1; /* optional flag to allow reordering of non spacing marks*/
43-
44-
static char *kwargs[] =
45-
{ "logical", "base_direction", "encoding", "clean", "reordernsm", NULL };
46-
47-
if (!PyArg_ParseTupleAndKeywords (args, kw, "O|isii", kwargs,
48-
&logical, &base, &encoding, &clean, &reordernsm))
49-
return NULL;
50-
51-
/* Validate base */
52-
53-
if (!(base == FRIBIDI_TYPE_RTL ||
54-
base == FRIBIDI_TYPE_LTR || base == FRIBIDI_TYPE_ON))
55-
return PyErr_Format (PyExc_ValueError,
56-
"invalid value %d: use either RTL, LTR or ON",
57-
base);
58-
59-
/* Check object type and delegate to one of the log2vis functions */
60-
61-
if (PyUnicode_Check (logical))
62-
return log2vis_unicode (logical, base, clean, reordernsm);
63-
else if (PyString_Check (logical))
64-
return log2vis_encoded_string (logical, encoding, base, clean, reordernsm);
65-
else
66-
return PyErr_Format (PyExc_TypeError,
67-
"expected unicode or str, not %s",
68-
logical->ob_type->tp_name);
29+
int i;
30+
int length = string->length;
31+
FriBidiChar *logical = NULL; /* input fribidi unicode buffer */
32+
FriBidiChar *visual = NULL; /* output fribidi unicode buffer */
33+
FriBidiStrIndex new_len = 0; /* length of the UTF-8 buffer */
34+
PyUnicodeObject *result = NULL;
35+
36+
/* Allocate fribidi unicode buffers
37+
TODO - Don't copy strings if sizeof(FriBidiChar) == sizeof(Py_UNICODE)
38+
*/
39+
40+
logical = PyMem_New (FriBidiChar, length + 1);
41+
if (logical == NULL) {
42+
PyErr_NoMemory();
43+
goto cleanup;
44+
}
45+
46+
visual = PyMem_New (FriBidiChar, length + 1);
47+
if (visual == NULL) {
48+
PyErr_NoMemory();
49+
goto cleanup;
50+
}
51+
52+
for (i=0; i<length; ++i) {
53+
logical[i] = string->str[i];
54+
}
55+
56+
/* Convert to unicode and order visually */
57+
fribidi_set_reorder_nsm(reordernsm);
58+
59+
if (!fribidi_log2vis (logical, length, &base_direction, visual,
60+
NULL, NULL, NULL)) {
61+
62+
PyErr_SetString (PyExc_RuntimeError,
63+
"fribidi failed to order string");
64+
goto cleanup;
65+
}
66+
67+
/* Cleanup the string if requested */
68+
if (clean) {
69+
length = fribidi_remove_bidi_marks (visual, length, NULL, NULL, NULL);
70+
}
71+
72+
result = (PyUnicodeObject*) PyUnicode_FromUnicode(NULL, length);
73+
if (result == NULL) {
74+
goto cleanup;
75+
}
76+
77+
for (i=0; i<length; ++i) {
78+
result->str[i] = visual[i];
79+
}
80+
81+
cleanup:
82+
/* Delete unicode buffers */
83+
PyMem_Del (logical);
84+
PyMem_Del (visual);
85+
86+
return (PyObject *)result;
6987
}
7088

71-
/*
72-
log2vis_unicode - reorder unicode string visually
73-
74-
Return value: new reference
75-
76-
Return Python unicode object ordered visually or NULL if an exception
77-
was raised.
78-
79-
Since Python and fribidi don't now know each other unicode format,
80-
encode input string as utf-8 and invoke log2vis_utf8.
81-
82-
Arguments:
83-
84-
- unicode: Python unicode object
85-
- base_direction: input string base direction, e.g right to left
86-
*/
87-
8889
static PyObject *
89-
log2vis_unicode (PyObject * unicode, FriBidiParType base_direction, int clean, int reordernsm)
90+
_pyfribidi_log2vis (PyObject * self, PyObject * args, PyObject * kw)
9091
{
91-
PyObject *logical = NULL; /* input string encoded in utf-8 */
92-
PyObject *visual = NULL; /* output string encoded in utf-8 */
93-
PyObject *result = NULL; /* unicode output string */
94-
95-
int length = PyUnicode_GET_SIZE (unicode);
96-
97-
logical = PyUnicode_AsUTF8String (unicode);
98-
if (logical == NULL)
99-
goto cleanup;
100-
101-
visual = log2vis_utf8 (logical, length, base_direction, clean, reordernsm);
102-
if (visual == NULL)
103-
goto cleanup;
104-
105-
result = PyUnicode_DecodeUTF8 (PyString_AS_STRING (visual),
106-
PyString_GET_SIZE (visual), "strict");
107-
108-
cleanup:
109-
Py_XDECREF (logical);
110-
Py_XDECREF (visual);
111-
112-
return result;
92+
PyUnicodeObject *logical = NULL; /* input unicode or string object */
93+
FriBidiParType base = FRIBIDI_TYPE_RTL; /* optional direction */
94+
int clean = 0; /* optional flag to clean the string */
95+
int reordernsm = 1; /* optional flag to allow reordering of non spacing marks*/
96+
97+
static char *kwargs[] =
98+
{ "logical", "base_direction", "clean", "reordernsm", NULL };
99+
100+
if (!PyArg_ParseTupleAndKeywords (args, kw, "U|iii", kwargs,
101+
&logical, &base, &clean, &reordernsm)) {
102+
return NULL;
103+
}
104+
105+
/* Validate base */
106+
107+
if (!(base == FRIBIDI_TYPE_RTL
108+
|| base == FRIBIDI_TYPE_LTR
109+
|| base == FRIBIDI_TYPE_ON)) {
110+
return PyErr_Format (PyExc_ValueError,
111+
"invalid value %d: use either RTL, LTR or ON",
112+
base);
113+
}
114+
115+
return unicode_log2vis (logical, base, clean, reordernsm);
113116
}
114117

115-
/*
116-
log2vis_encoded_string - reorder encoded string visually
117-
118-
Return value: new reference
119-
120-
Return Python string object ordered visually or NULL if an exception
121-
was raised. The returned string use the same encoding.
122-
123-
Invoke either log2vis_utf8 or log2vis_unicode.
124-
125-
- string: Python string object using encoding
126-
- encoding: string encoding, any encoding name known to Python
127-
- base_direction: input string base direction, e.g right to left
128-
*/
129-
130-
static PyObject *
131-
log2vis_encoded_string (PyObject * string, const char *encoding,
132-
FriBidiParType base_direction, int clean, int reordernsm)
133-
{
134-
PyObject *logical = NULL; /* logical unicode object */
135-
PyObject *result = NULL; /* output string object */
136-
137-
/* Always needed for the string length */
138-
logical = PyUnicode_Decode (PyString_AS_STRING (string),
139-
PyString_GET_SIZE (string),
140-
encoding, "strict");
141-
if (logical == NULL)
142-
return NULL;
143-
144-
if (strcmp (encoding, "utf-8") == 0)
145-
/* Shortcut for utf8 strings (little faster) */
146-
result = log2vis_utf8 (string,
147-
PyUnicode_GET_SIZE (logical),
148-
base_direction, clean, reordernsm);
149-
else
150-
{
151-
/* Invoke log2vis_unicode and encode back to encoding */
152-
153-
PyObject *visual = log2vis_unicode (logical, base_direction, clean, reordernsm);
154-
155-
if (visual)
156-
{
157-
result = PyUnicode_Encode (PyUnicode_AS_UNICODE
158-
(visual),
159-
PyUnicode_GET_SIZE (visual),
160-
encoding, "strict");
161-
Py_DECREF (visual);
162-
}
163-
}
164-
165-
Py_DECREF (logical);
166-
167-
return result;
168-
}
169-
170-
/*
171-
log2vis_utf8 - reorder string visually
172-
173-
Return value: new reference
174-
175-
Return Python string object ordered visually or NULL if an exception
176-
was raised.
177-
178-
Arguments:
179-
180-
- string: Python string object using utf-8 encoding
181-
- unicode_length: number of characters in string. This is not the
182-
number of bytes in the string, which may be much bigger than the
183-
number of characters, because utf-8 uses 1-4 bytes per character.
184-
- base_direction: input string base direction, e.g right to left
185-
*/
186-
187-
static PyObject *
188-
log2vis_utf8 (PyObject * string, int unicode_length,
189-
FriBidiParType base_direction, int clean, int reordernsm)
190-
{
191-
FriBidiChar *logical = NULL; /* input fribidi unicode buffer */
192-
FriBidiChar *visual = NULL; /* output fribidi unicode buffer */
193-
char *visual_utf8 = NULL; /* output fribidi UTF-8 buffer */
194-
FriBidiStrIndex new_len = 0; /* length of the UTF-8 buffer */
195-
PyObject *result = NULL; /* failure */
196-
197-
/* Allocate fribidi unicode buffers */
198-
199-
logical = PyMem_New (FriBidiChar, unicode_length + 1);
200-
if (logical == NULL)
201-
{
202-
PyErr_SetString (PyExc_MemoryError,
203-
"failed to allocate unicode buffer");
204-
goto cleanup;
205-
}
206-
207-
visual = PyMem_New (FriBidiChar, unicode_length + 1);
208-
if (visual == NULL)
209-
{
210-
PyErr_SetString (PyExc_MemoryError,
211-
"failed to allocate unicode buffer");
212-
goto cleanup;
213-
}
214-
215-
/* Convert to unicode and order visually */
216-
fribidi_set_reorder_nsm(reordernsm);
217-
fribidi_utf8_to_unicode (PyString_AS_STRING (string),
218-
PyString_GET_SIZE (string), logical);
219-
220-
if (!fribidi_log2vis (logical, unicode_length, &base_direction, visual,
221-
NULL, NULL, NULL))
222-
{
223-
PyErr_SetString (PyExc_RuntimeError,
224-
"fribidi failed to order string");
225-
goto cleanup;
226-
}
227-
228-
/* Cleanup the string if requested */
229-
if (clean)
230-
fribidi_remove_bidi_marks (visual, unicode_length, NULL, NULL, NULL);
231-
232-
/* Allocate fribidi UTF-8 buffer */
233-
234-
visual_utf8 = PyMem_New(char, (unicode_length * 4)+1);
235-
if (visual_utf8 == NULL)
236-
{
237-
PyErr_SetString (PyExc_MemoryError,
238-
"failed to allocate UTF-8 buffer");
239-
goto cleanup;
240-
}
241-
242-
/* Encode the reordered string and create result string */
243-
244-
new_len = fribidi_unicode_to_utf8 (visual, unicode_length, visual_utf8);
245-
246-
result = PyString_FromStringAndSize (visual_utf8, new_len);
247-
if (result == NULL)
248-
/* XXX does it raise any error? */
249-
goto cleanup;
250-
251-
cleanup:
252-
/* Delete unicode buffers */
253-
PyMem_Del (logical);
254-
PyMem_Del (visual);
255-
PyMem_Del (visual_utf8);
256-
257-
return result;
258-
}
259118

260119
static PyMethodDef PyfribidiMethods[] = {
261-
{"log2vis", (PyCFunction) _pyfribidi_log2vis,
262-
METH_VARARGS | METH_KEYWORDS,
263-
_pyfribidi_log2vis__doc__},
120+
{"log2vis", (PyCFunction) _pyfribidi_log2vis, METH_VARARGS | METH_KEYWORDS, NULL},
264121
{NULL, NULL, 0, NULL}
265122
};
266123

124+
267125
PyMODINIT_FUNC
268-
initpyfribidi (void)
126+
init_pyfribidi (void)
269127
{
270-
PyObject *module;
271-
272-
/* XXX What should be done if we fail here? */
273-
274-
module = Py_InitModule3 ("pyfribidi", PyfribidiMethods,
275-
_pyfribidi__doc__);
276-
128+
PyObject *module = Py_InitModule ("_pyfribidi", PyfribidiMethods);
277129
PyModule_AddIntConstant (module, "RTL", (long) FRIBIDI_TYPE_RTL);
278130
PyModule_AddIntConstant (module, "LTR", (long) FRIBIDI_TYPE_LTR);
279131
PyModule_AddIntConstant (module, "ON", (long) FRIBIDI_TYPE_ON);
280-
281-
PyModule_AddStringConstant (module, "__author__",
282-
"Yaacov Zamir and Nir Soffer");
283132
}

0 commit comments

Comments
 (0)