Implemented #385 enhancement and updated documentation (#549)

* Implemented #385 enhancement and updated documentation Signed-off-by: Darko Djolovic <ddjolovic@outlook.com> * Created flag to Cursor.var() Signed-off-by: Darko Djolovic <ddjolovic@outlook.com> * Removed first commit changes, updated documetnation Signed-off-by: Darko Djolovic <ddjolovic@outlook.com> * Added testing sample 'QueringRawData.py' and renamed attribute 'bypassstringencoding' to 'bypassencoding' with updated documentation Signed-off-by: Darko Djolovic <ddjolovic@outlook.com>
oracle · Apr 23, 2021 · 95baec2 · 95baec2
1 parent ffa2086
commit 95baec2
Show file tree

Hide file tree

Showing 4 changed files with 175 additions and 8 deletions.
diff --git a/doc/src/api_manual/cursor.rst b/doc/src/api_manual/cursor.rst
@@ -572,7 +572,7 @@ Cursor Object
 
 
 .. method:: Cursor.var(dataType, [size, arraysize, inconverter, outconverter, \
-        typename, encodingErrors])
+        typename, encodingErrors, bypassencoding])
 
     Create a variable with the specified characteristics. This method was
     designed for use with PL/SQL in/out variables where the length or type
@@ -632,6 +632,12 @@ Cursor Object
     `decode <https://docs.python.org/3/library/stdtypes.html#bytes.decode>`__
     function.
 
+    The bypassencoding parameter, if specified, should be passed as
+    boolean. This feature allows results of database types CHAR, NCHAR,
+    LONG_STRING, NSTRING, STRING to be returned raw meaning cx_Oracle
+    won't do any decoding conversion. See 
+    :ref:`Fetching raw data <fetching-raw-data>` for more information.
+
     .. note::
 
         The DB API definition does not define this method.
diff --git a/doc/src/user_guide/sql_execution.rst b/doc/src/user_guide/sql_execution.rst
@@ -287,8 +287,8 @@ going to be fetched. The function is expected to return a
 or the value ``None``. The value ``None`` indicates that the default type
 should be used.
 
-Examples of output handlers are shown in :ref:`numberprecision` and
-:ref:`directlobs`.  Also see samples such as `samples/TypeHandlers.py
+Examples of output handlers are shown in :ref:`numberprecision`,
+:ref:`directlobs` and :ref:`fetching-raw-data`.  Also see samples such as `samples/TypeHandlers.py
 <https://github.com/oracle/python-cx_Oracle/blob/master/samples/TypeHandlers.py>`__
 
 .. _numberprecision:
@@ -344,6 +344,87 @@ See `samples/ReturnNumbersAsDecimals.py
 <https://github.com/oracle/python-cx_Oracle/blob/master/samples/ReturnNumbersAsDecimals.py>`__
 
 
+.. _fetching-raw-data:
+
+Fetching Raw Data
+---------------------
+
+Sometimes cx_Oracle may have problems converting data to unicode and you may 
+want to inspect the problem closer rather than auto-fix it using the 
+encodingerrors parameter. This may be useful when a database contains 
+records or fields that are in a wrong encoding altogether.
+
+It is not recommended to use mixed encodings in databases. 
+This functionality is aimed at troubleshooting databases 
+that have inconsistent encodings for external reasons.
+
+For these cases, you can pass in the in additional keyword argument 
+``bypassencoding = True`` into :meth:`Cursor.var()`. This needs
+to be used in combination with :ref:`outputtypehandlers`
+
+    .. code-block:: python
+
+		#defining output type handlers method
+		def ConvertStringToBytes(cursor, name, defaultType, size, precision, scale):
+		    if defaultType == cx_Oracle.STRING:
+		        return cursor.var(str, arraysize=cursor.arraysize, bypassencoding = True) 
+
+		#set cursor outputtypehandler to the method above
+		cursor = connection.cursor()
+		ursor.outputtypehandler = ConvertStringToBytes
+
+
+This will allow you to receive data as raw bytes.
+
+    .. code-block:: python
+
+		statement = cursor.execute("select content, charset from SomeTable")
+		data = statement.fetchall()
+
+
+This will produce output as:
+
+    .. code-block:: python
+
+	    [(b'Fianc\xc3\xa9', b'UTF-8')]
+
+
+Note that last \xc3\xa9 is é in UTF-8. Then in  you can do following:
+
+
+    .. code-block:: python
+
+		import codecs
+		# data = [(b'Fianc\xc3\xa9', b'UTF-8')]
+		unicodecontent = data[0][0].decode(data[0][1].decode()) # Assuming your charset encoding is UTF-8 
+
+
+This will revert it back to "Fiancé".
+
+If you want to save ``b'Fianc\xc3\xa9'`` to database you will need to create 
+:meth:`Cursor.var()` that will tell cx_Oracle that the value is indeed 
+intended as a string:
+
+
+    .. code-block:: python
+		
+	    connection = cx_Oracle.connect("hr", userpwd, "dbhost.example.com/orclpdb1")
+	    cursor = connection.cursor()
+	    cursorvariable = cursor.var(cx_Oracle.STRING)
+	    cursorvariable.setvalue(0, "Fiancé".encode("UTF-8")) # b'Fianc\xc4\x9b'
+	    cursor.execute("update SomeTable set SomeColumn = :param where id = 1", param=cursorvariable)
+
+
+At that point, the bytes will be assumed to be in the correct encoding and should insert as you expect.
+
+.. warning::
+    This functionality is "as-is": when saving strings like this, 
+    the bytes will be assumed to be in the correct encoding and will 
+    insert like that. Proper encoding is the responsibility of the user and
+    no correctness of any data in the database can be assumed 
+    to exist by itself.
+
+
 .. _outconverters:
 
 Changing Query Results with Outconverters

diff --git a/samples/QueringRawData.py b/samples/QueringRawData.py
@@ -0,0 +1,75 @@
+# -*- coding: utf-8 -*-
+import cx_Oracle
+import sample_env
+
+"The test below verifies that the option to work around saving and reading of inconsistent encodings works"
+
+def ConvertStringToBytes(cursor, name, defaultType, size, precision, scale):
+    if defaultType == cx_Oracle.STRING:
+        return cursor.var(str, arraysize=cursor.arraysize, bypassencoding = True)    
+
+connection = cx_Oracle.connect(sample_env.get_main_connect_string())
+cursor = connection.cursor()
+
+cursor.outputtypehandler = ConvertStringToBytes
+
+sql = 'create table EncodingExperiment (content varchar2(100), encoding varchar2(15))'
+
+print('Creating experiment table')
+try:
+    cursor.execute(sql)
+    print('Success, will attempt to add records')
+except Exception as err:
+    # table already exists
+    print('%s\n%s'%(err, 'EncodingExperiment table exists... Will attempt to add records'))
+
+# variable that we will test encodings against
+unicode_string = 'I bought a cafetière on the Champs-Élysées'
+
+# First test
+windows_1252_encoded = unicode_string.encode('windows-1252')
+# Second test
+utf8_encoded = unicode_string.encode('utf-8')
+
+sqlparameters = [(windows_1252_encoded, 'windows-1252'), (utf8_encoded, 'utf-8')]
+
+sql = 'insert into EncodingExperiment (content, encoding) values (:content, :encoding)'
+
+# cx_Oracle string variable in which we will store byte value and insert it as such
+content_variable = cursor.var(cx_Oracle.STRING)
+
+print('Adding records to the table: "EncodingExperiment"')
+for sqlparameter in sqlparameters:
+    content, encoding = sqlparameter
+    # setting content_variable value to a byte value and instert it as such
+    content_variable.setvalue(0, content)
+    cursor.execute(sql, content=content_variable, encoding=encoding)
+
+sql = 'select * from EncodingExperiment'
+
+print('Fetching records from table EncodingExperiment')
+result = cursor.execute(sql).fetchall()
+
+for dataset in result:
+    content, encoding = dataset[0], dataset[1].decode()
+    decodedcontent = content.decode(encoding)
+    print('Is "%s" == "%s" ?\nResult: %s, (decoded from: %s)'%(decodedcontent, unicode_string, decodedcontent == unicode_string, encoding))
+
+print('Finished testing, will attempt to drop the table "EncodingExperiment"')
+# drop table after finished testing
+sql = 'drop table EncodingExperiment'
+try:
+    cursor.execute(sql)
+    print('Successfully droped table "EncodingExperiment" from database.')
+except Exception as err:
+    print('Failed to drop table from the database, info: %s'%err)
+
+
+
+
+
+
+
+
+
+
diff --git a/src/cxoCursor.c b/src/cxoCursor.c
@@ -1792,25 +1792,25 @@ static PyObject *cxoCursor_var(cxoCursor *cursor, PyObject *args,
         PyObject *keywordArgs)
 {
     static char *keywordList[] = { "type", "size", "arraysize",
-            "inconverter", "outconverter", "typename", "encodingErrors",
+            "inconverter", "outconverter", "typename", "encodingErrors", "bypassencoding",
             NULL };
     PyObject *inConverter, *outConverter, *typeNameObj;
     Py_ssize_t encodingErrorsLength;
     cxoTransformNum transformNum;
     const char *encodingErrors;
     cxoObjectType *objType;
-    int size, arraySize;
+    int size, arraySize, bypassEncoding;
     PyObject *type;
     cxoVar *var;
 
     // parse arguments
-    size = 0;
+    size = bypassEncoding = 0;
     encodingErrors = NULL;
     arraySize = cursor->bindArraySize;
     inConverter = outConverter = typeNameObj = NULL;
-    if (!PyArg_ParseTupleAndKeywords(args, keywordArgs, "O|iiOOOz#",
+    if (!PyArg_ParseTupleAndKeywords(args, keywordArgs, "O|iiOOOz#p",
             keywordList, &type, &size, &arraySize, &inConverter, &outConverter,
-            &typeNameObj, &encodingErrors, &encodingErrorsLength))
+            &typeNameObj, &encodingErrors, &encodingErrorsLength, &bypassEncoding))
         return NULL;
 
     // determine the type of variable
@@ -1843,6 +1843,11 @@ static PyObject *cxoCursor_var(cxoCursor *cursor, PyObject *args,
         strcpy((char*) var->encodingErrors, encodingErrors);
     }
 
+    // Flag that manually changes transform type to bytes
+    if (bypassEncoding) {
+        var->transformNum = CXO_TRANSFORM_BINARY;
+    }
+
     return (PyObject*) var;
 }