From 95baec24361312ab1c045efe322e1f653e93fa36 Mon Sep 17 00:00:00 2001 From: Draco94 <31309016+Draco94@users.noreply.github.com> Date: Fri, 23 Apr 2021 19:38:45 +0200 Subject: [PATCH] Implemented #385 enhancement and updated documentation (#549) * Implemented #385 enhancement and updated documentation Signed-off-by: Darko Djolovic * Created flag to Cursor.var() Signed-off-by: Darko Djolovic * Removed first commit changes, updated documetnation Signed-off-by: Darko Djolovic * Added testing sample 'QueringRawData.py' and renamed attribute 'bypassstringencoding' to 'bypassencoding' with updated documentation Signed-off-by: Darko Djolovic --- doc/src/api_manual/cursor.rst | 8 ++- doc/src/user_guide/sql_execution.rst | 85 +++++++++++++++++++++++++++- samples/QueringRawData.py | 75 ++++++++++++++++++++++++ src/cxoCursor.c | 15 +++-- 4 files changed, 175 insertions(+), 8 deletions(-) create mode 100644 samples/QueringRawData.py diff --git a/doc/src/api_manual/cursor.rst b/doc/src/api_manual/cursor.rst index a916c0b..dfa9d6a 100644 --- a/doc/src/api_manual/cursor.rst +++ b/doc/src/api_manual/cursor.rst @@ -572,7 +572,7 @@ Cursor Object .. method:: Cursor.var(dataType, [size, arraysize, inconverter, outconverter, \ - typename, encodingErrors]) + typename, encodingErrors, bypassencoding]) Create a variable with the specified characteristics. This method was designed for use with PL/SQL in/out variables where the length or type @@ -632,6 +632,12 @@ Cursor Object `decode `__ function. + The bypassencoding parameter, if specified, should be passed as + boolean. This feature allows results of database types CHAR, NCHAR, + LONG_STRING, NSTRING, STRING to be returned raw meaning cx_Oracle + won't do any decoding conversion. See + :ref:`Fetching raw data ` for more information. + .. note:: The DB API definition does not define this method. diff --git a/doc/src/user_guide/sql_execution.rst b/doc/src/user_guide/sql_execution.rst index 3a51cce..cd62271 100644 --- a/doc/src/user_guide/sql_execution.rst +++ b/doc/src/user_guide/sql_execution.rst @@ -287,8 +287,8 @@ going to be fetched. The function is expected to return a or the value ``None``. The value ``None`` indicates that the default type should be used. -Examples of output handlers are shown in :ref:`numberprecision` and -:ref:`directlobs`. Also see samples such as `samples/TypeHandlers.py +Examples of output handlers are shown in :ref:`numberprecision`, +:ref:`directlobs` and :ref:`fetching-raw-data`. Also see samples such as `samples/TypeHandlers.py `__ .. _numberprecision: @@ -344,6 +344,87 @@ See `samples/ReturnNumbersAsDecimals.py `__ +.. _fetching-raw-data: + +Fetching Raw Data +--------------------- + +Sometimes cx_Oracle may have problems converting data to unicode and you may +want to inspect the problem closer rather than auto-fix it using the +encodingerrors parameter. This may be useful when a database contains +records or fields that are in a wrong encoding altogether. + +It is not recommended to use mixed encodings in databases. +This functionality is aimed at troubleshooting databases +that have inconsistent encodings for external reasons. + +For these cases, you can pass in the in additional keyword argument +``bypassencoding = True`` into :meth:`Cursor.var()`. This needs +to be used in combination with :ref:`outputtypehandlers` + + .. code-block:: python + + #defining output type handlers method + def ConvertStringToBytes(cursor, name, defaultType, size, precision, scale): + if defaultType == cx_Oracle.STRING: + return cursor.var(str, arraysize=cursor.arraysize, bypassencoding = True) + + #set cursor outputtypehandler to the method above + cursor = connection.cursor() + ursor.outputtypehandler = ConvertStringToBytes + + +This will allow you to receive data as raw bytes. + + .. code-block:: python + + statement = cursor.execute("select content, charset from SomeTable") + data = statement.fetchall() + + +This will produce output as: + + .. code-block:: python + + [(b'Fianc\xc3\xa9', b'UTF-8')] + + +Note that last \xc3\xa9 is é in UTF-8. Then in you can do following: + + + .. code-block:: python + + import codecs + # data = [(b'Fianc\xc3\xa9', b'UTF-8')] + unicodecontent = data[0][0].decode(data[0][1].decode()) # Assuming your charset encoding is UTF-8 + + +This will revert it back to "Fiancé". + +If you want to save ``b'Fianc\xc3\xa9'`` to database you will need to create +:meth:`Cursor.var()` that will tell cx_Oracle that the value is indeed +intended as a string: + + + .. code-block:: python + + connection = cx_Oracle.connect("hr", userpwd, "dbhost.example.com/orclpdb1") + cursor = connection.cursor() + cursorvariable = cursor.var(cx_Oracle.STRING) + cursorvariable.setvalue(0, "Fiancé".encode("UTF-8")) # b'Fianc\xc4\x9b' + cursor.execute("update SomeTable set SomeColumn = :param where id = 1", param=cursorvariable) + + +At that point, the bytes will be assumed to be in the correct encoding and should insert as you expect. + +.. warning:: + This functionality is "as-is": when saving strings like this, + the bytes will be assumed to be in the correct encoding and will + insert like that. Proper encoding is the responsibility of the user and + no correctness of any data in the database can be assumed + to exist by itself. + + .. _outconverters: Changing Query Results with Outconverters diff --git a/samples/QueringRawData.py b/samples/QueringRawData.py new file mode 100644 index 0000000..c70b23c --- /dev/null +++ b/samples/QueringRawData.py @@ -0,0 +1,75 @@ +# -*- coding: utf-8 -*- +import cx_Oracle +import sample_env + +"The test below verifies that the option to work around saving and reading of inconsistent encodings works" + +def ConvertStringToBytes(cursor, name, defaultType, size, precision, scale): + if defaultType == cx_Oracle.STRING: + return cursor.var(str, arraysize=cursor.arraysize, bypassencoding = True) + +connection = cx_Oracle.connect(sample_env.get_main_connect_string()) +cursor = connection.cursor() + +cursor.outputtypehandler = ConvertStringToBytes + +sql = 'create table EncodingExperiment (content varchar2(100), encoding varchar2(15))' + +print('Creating experiment table') +try: + cursor.execute(sql) + print('Success, will attempt to add records') +except Exception as err: + # table already exists + print('%s\n%s'%(err, 'EncodingExperiment table exists... Will attempt to add records')) + +# variable that we will test encodings against +unicode_string = 'I bought a cafetière on the Champs-Élysées' + +# First test +windows_1252_encoded = unicode_string.encode('windows-1252') +# Second test +utf8_encoded = unicode_string.encode('utf-8') + +sqlparameters = [(windows_1252_encoded, 'windows-1252'), (utf8_encoded, 'utf-8')] + +sql = 'insert into EncodingExperiment (content, encoding) values (:content, :encoding)' + +# cx_Oracle string variable in which we will store byte value and insert it as such +content_variable = cursor.var(cx_Oracle.STRING) + +print('Adding records to the table: "EncodingExperiment"') +for sqlparameter in sqlparameters: + content, encoding = sqlparameter + # setting content_variable value to a byte value and instert it as such + content_variable.setvalue(0, content) + cursor.execute(sql, content=content_variable, encoding=encoding) + +sql = 'select * from EncodingExperiment' + +print('Fetching records from table EncodingExperiment') +result = cursor.execute(sql).fetchall() + +for dataset in result: + content, encoding = dataset[0], dataset[1].decode() + decodedcontent = content.decode(encoding) + print('Is "%s" == "%s" ?\nResult: %s, (decoded from: %s)'%(decodedcontent, unicode_string, decodedcontent == unicode_string, encoding)) + +print('Finished testing, will attempt to drop the table "EncodingExperiment"') +# drop table after finished testing +sql = 'drop table EncodingExperiment' +try: + cursor.execute(sql) + print('Successfully droped table "EncodingExperiment" from database.') +except Exception as err: + print('Failed to drop table from the database, info: %s'%err) + + + + + + + + + + diff --git a/src/cxoCursor.c b/src/cxoCursor.c index 19fc3df..6988b73 100644 --- a/src/cxoCursor.c +++ b/src/cxoCursor.c @@ -1792,25 +1792,25 @@ static PyObject *cxoCursor_var(cxoCursor *cursor, PyObject *args, PyObject *keywordArgs) { static char *keywordList[] = { "type", "size", "arraysize", - "inconverter", "outconverter", "typename", "encodingErrors", + "inconverter", "outconverter", "typename", "encodingErrors", "bypassencoding", NULL }; PyObject *inConverter, *outConverter, *typeNameObj; Py_ssize_t encodingErrorsLength; cxoTransformNum transformNum; const char *encodingErrors; cxoObjectType *objType; - int size, arraySize; + int size, arraySize, bypassEncoding; PyObject *type; cxoVar *var; // parse arguments - size = 0; + size = bypassEncoding = 0; encodingErrors = NULL; arraySize = cursor->bindArraySize; inConverter = outConverter = typeNameObj = NULL; - if (!PyArg_ParseTupleAndKeywords(args, keywordArgs, "O|iiOOOz#", + if (!PyArg_ParseTupleAndKeywords(args, keywordArgs, "O|iiOOOz#p", keywordList, &type, &size, &arraySize, &inConverter, &outConverter, - &typeNameObj, &encodingErrors, &encodingErrorsLength)) + &typeNameObj, &encodingErrors, &encodingErrorsLength, &bypassEncoding)) return NULL; // determine the type of variable @@ -1843,6 +1843,11 @@ static PyObject *cxoCursor_var(cxoCursor *cursor, PyObject *args, strcpy((char*) var->encodingErrors, encodingErrors); } + // Flag that manually changes transform type to bytes + if (bypassEncoding) { + var->transformNum = CXO_TRANSFORM_BINARY; + } + return (PyObject*) var; }