Skip to content

Commit

Permalink
Added streaming read method
Browse files Browse the repository at this point in the history
  • Loading branch information
mbleron committed Nov 11, 2016
1 parent 2f339a2 commit 0498570
Show file tree
Hide file tree
Showing 18 changed files with 1,305 additions and 394 deletions.
1,010 changes: 635 additions & 375 deletions ExcelTable.pck → ExcelTable.pkb

Large diffs are not rendered by default.

113 changes: 113 additions & 0 deletions ExcelTable.pks
@@ -0,0 +1,113 @@
create or replace package ExcelTable is
/* ======================================================================================

MIT License

Copyright (c) 2016 Marc Bleron

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

=========================================================================================
Change history :
Marc Bleron 2016-05-01 Creation
Marc Bleron 2016-06-25 Added string_cache.delete on tableClose
Added lob freeing
Marc Bleron 2016-09-11 Added ECDS_MAX_OFFSET in function Zip_openArchive
Marc Bleron 2016-10-30 New streaming read method for large files (requires
Java)
Added setFetchSize() procedure
====================================================================================== */

-- Read methods
DOM_READ constant binary_integer := 0;
STREAM_READ constant binary_integer := 1;

procedure setFetchSize (p_nrows in number);

/*
EBNF grammar for the range_expr and column_list expression

range_expr ::= ( cell_ref [ ":" cell_ref ] | col_ref ":" col_ref | row_ref ":" row_ref )
cell_ref ::= col_ref row_ref
col_ref ::= { "A".."Z" }
row_ref ::= integer

column_list ::= column_expr { "," column_expr }
column_expr ::= ( identifier datatype [ "column" string_literal ] | identifier for_ordinality )
datatype ::= ( number_expr | varchar2_expr | date_expr | clob_expr | for_ordinality )
number_expr ::= "number" [ "(" ( integer | "*" ) [ "," integer ] ")" ]
varchar2_expr ::= "varchar2" "(" integer [ "char" | "byte" ] ")"
date_expr ::= "date" [ "format" string_literal ]
clob_expr ::= "clob"
for_ordinality ::= "for" "ordinality"
identifier ::= "\"" { char } "\""
string_literal ::= "'" { char } "'"

*/

function getRows (
p_file in blob
, p_sheet in varchar2
, p_cols in varchar2
, p_range in varchar2 default null
, p_method in binary_integer default DOM_READ
)
return anydataset pipelined
using ExcelTableImpl;

procedure tableDescribe (
rtype out nocopy anytype
, p_range in varchar2
, p_cols in varchar2
);

function tablePrepare(
tf_info in sys.ODCITabFuncInfo
)
return anytype;

procedure tableStart (
p_file in blob
, p_sheet in varchar2
, p_range in varchar2
, p_cols in varchar2
, p_method in binary_integer
, p_ctx_id out binary_integer
);

procedure tableFetch(
p_type in out nocopy anytype
, p_ctx_id in out nocopy binary_integer
, p_done in out nocopy integer
, nrows in number
, rws out nocopy anydataset
);

procedure tableClose(
p_ctx_id in binary_integer
);

function getFile (
p_directory in varchar2
, p_filename in varchar2
)
return blob;

end ExcelTable;
/
7 changes: 7 additions & 0 deletions ExcelTableCell.tps
@@ -0,0 +1,7 @@
create or replace type ExcelTableCell as object (
cellRow integer
, cellCol varchar2(3)
, cellType varchar2(10)
, cellData anydata
)
/
2 changes: 2 additions & 0 deletions ExcelTableCellList.tps
@@ -0,0 +1,2 @@
create or replace type ExcelTableCellList is table of ExcelTableCell
/
22 changes: 8 additions & 14 deletions ExcelTableImpl.tpb
Expand Up @@ -6,6 +6,7 @@ create or replace type body ExcelTableImpl as
, p_sheet in varchar2
, p_cols in varchar2
, p_range in varchar2 default null
, p_method in binary_integer default 0
)
return number
is
Expand All @@ -25,18 +26,15 @@ create or replace type body ExcelTableImpl as
, p_sheet in varchar2
, p_cols in varchar2
, p_range in varchar2 default null
, p_method in binary_integer default 0
)
return number
is
begin

--dbms_output.put_line('ODCITablePrepare');

sctx := ExcelTableImpl(
ExcelTable.tablePrepare(tf_info)
, null
, null
, 0
, 0
) ;

Expand All @@ -50,15 +48,15 @@ create or replace type body ExcelTableImpl as
, p_file in blob
, p_sheet in varchar2
, p_cols in varchar2
, p_range in varchar2 default null
, p_range in varchar2 default null
, p_method in binary_integer default 0
)
return number
is
begin

--dbms_output.put_line('ODCITableStart');
ExcelTable.tableStart(p_file, p_sheet, p_range, p_cols, sctx.doc_id, sctx.ctx_id);


ExcelTable.tableStart(p_file, p_sheet, p_range, p_cols, p_method, sctx.ctx_id);

return ODCIConst.SUCCESS;

end ODCITableStart;
Expand All @@ -73,12 +71,9 @@ create or replace type body ExcelTableImpl as
is
begin

--dbms_output.put_line('ODCITableFetch : '||nrows);

ExcelTable.tableFetch(
self.atype
, self.ctx_id
, self.r_num
, self.done
, nrows
, rws
Expand All @@ -94,8 +89,7 @@ create or replace type body ExcelTableImpl as
is
begin

--dbms_output.put_line('ODCITableClose');
ExcelTable.tableClose(self.doc_id, self.ctx_id);
ExcelTable.tableClose(self.ctx_id);

return ODCIConst.SUCCESS;

Expand Down
9 changes: 6 additions & 3 deletions ExcelTableImpl.tps
Expand Up @@ -26,12 +26,12 @@ create or replace type ExcelTableImpl as object (
=========================================================================================
Change history :
Marc Bleron 2016-05-01 Creation
Marc Bleron 2016-10-19 Removed doc_id attribute,
changed ctx_id to integer data type
====================================================================================== */

atype anytype
, doc_id raw(13)
, ctx_id raw(13)
, r_num integer
, ctx_id integer
, done integer

, static function ODCITableDescribe(
Expand All @@ -40,6 +40,7 @@ create or replace type ExcelTableImpl as object (
, p_sheet in varchar2
, p_cols in varchar2
, p_range in varchar2 default null
, p_method in binary_integer default 0
)
return number

Expand All @@ -50,6 +51,7 @@ create or replace type ExcelTableImpl as object (
, p_sheet in varchar2
, p_cols in varchar2
, p_range in varchar2 default null
, p_method in binary_integer default 0
)
return number

Expand All @@ -59,6 +61,7 @@ create or replace type ExcelTableImpl as object (
, p_sheet in varchar2
, p_cols in varchar2
, p_range in varchar2 default null
, p_method in binary_integer default 0
)
return number

Expand Down
68 changes: 66 additions & 2 deletions README.md
Expand Up @@ -3,26 +3,74 @@
ExcelTable is a pipelined table interface to read an Excel file (.xlsx or .xlsm) as if it were an external table.
It is entirely implemented in PL/SQL using an object type (for the ODCI routines) and a package supporting the core functionalities.

> As of version 1.2, a streaming implementation is available for better scalability on large files.
> This feature requires the server-side Java VM.
## Bug tracker

Found bugs? I'm sure there are...
Please create an issue here on GitHub at <https://github.com/mbleron/oracle/issues>.

## Installation

### Database requirement

ExcelTable requires Oracle Database 11\.2\.0\.2 and onwards.
> Note that the interface may work as well on version 11\.1\.0\.6, 11\.1\.0\.7 and 11\.2\.0\.1, with limited support for CLOB projections, but that scenario has not been tested.
### DBA preliminary tasks

ExcelTable package needs read access to V$PARAMETER view internally to retrieve the value of the `max_string_size` parameter.
Therefore, the owner must be granted the necessary privilege in order to compile and run the program :
```sql
grant select on sys.v_$parameter to <user>;
```

Then, in this order :
On versions prior to 11\.2\.0\.4, a temporary XMLType table is used internally.
The owner requires the CREATE TABLE privilege in this case :
```sql
grant create table to <user>;
```


### PL/SQL

Create the following objects, in this order :
```
@ExcelTableCell.tps
@ExcelTableCellList.tps
@ExcelTableImpl.tps
@ExcelTable.pck
@ExcelTable.pks
@ExcelTable.pkb
@ExcelTableImpl.tpb
```

### Java

If you want to use the streaming method, some Java classes - packed in a jar file - have to be deployed in the database.
The jar files to deploy depend on the database version.

* Versions < 11\.2\.0\.4
Except for version 11\.2\.0\.4 which supports JDK 6, Oracle 11g only supports JDK 5 (Java 1.5).
Load the following jar files in order to use the streaming method :
+ stax-api-1.0-2.jar
+ sjsxp-1.0.2.jar
+ exceldbtools-1.5.jar

```
loadjava -u user/passwd@sid -r -v -jarsasdbobjects java/lib/stax-api-1.0-2.jar
loadjava -u user/passwd@sid -r -v -jarsasdbobjects java/lib/sjsxp-1.0.2.jar
loadjava -u user/passwd@sid -r -v -jarsasdbobjects java/lib/exceldbtools-1.5.jar
```


* Versions >= 11\.2\.0\.4
The StAX API is included in JDK 6, as well as the Sun Java implementation (SJXSP), so for those versions one only needs to load the following jar file :
+ exceldbtools-1.6.jar

```
loadjava -u user/passwd@sid -r -v -jarsasdbobjects java/lib/exceldbtools-1.6.jar
```

## Usage

Expand All @@ -32,6 +80,7 @@ function getRows (
, p_sheet in varchar2
, p_cols in varchar2
, p_range in varchar2 default null
, p_method in binary_integer default DOM_READ
)
return anydataset pipelined
using ExcelTableImpl;
Expand All @@ -42,6 +91,17 @@ A helper function `ExcelTable.getFile` is available to directly reference the fi
* `p_sheet` : Worksheet name
* `p_cols` : Column list (see [specs](#columns-syntax-specification) below)
* `p_range` : Excel-like range expression that defines the table boundaries in the worksheet (see [specs](#range-syntax-specification) below)
* `p_method` : Read method - `DOM_READ` (0) the default, or `STREAM_READ` (1)


New in version 1.2
```sql
procedure setFetchSize (p_nrows in number);
```
Use setFetchSize() to control the number of rows returned by each invocation of the ODCITableFetch method.
If the number of rows requested by the client is greater than the fetch size, the fetch size is used instead.
The default fetch size is 100.


#### Columns syntax specification

Expand Down Expand Up @@ -138,6 +198,10 @@ from table(


## CHANGELOG
### 1.2 (2016-10-30)

* Added new streaming read method
* Added setFetchSize() procedure

### 1.1 (2016-06-25)

Expand Down
Binary file added java/lib/exceldbtools-1.5.jar
Binary file not shown.
Binary file added java/lib/exceldbtools-1.6.jar
Binary file not shown.
Binary file added java/lib/sjsxp-1.0.2.jar
Binary file not shown.
Binary file added java/lib/stax-api-1.0-2.jar
Binary file not shown.
42 changes: 42 additions & 0 deletions java/src/db/office/spreadsheet/Cell.java
@@ -0,0 +1,42 @@
package db.office.spreadsheet;

import java.sql.Clob;
import java.sql.Connection;
import java.sql.SQLException;

import oracle.sql.ANYDATA;
import oracle.sql.CHAR;
import oracle.sql.Datum;

public class Cell {

//private static CharacterSet charset = CharacterSet.make(CharacterSet.DEFAULT_CHARSET);

private CellRef ref;
private String type;
private String value;

public Cell (CellRef cellRef, String value, String type) {
this.ref = cellRef;
this.type = type;
this.value = value;
}

public String toString() {
return this.ref.column + this.ref.row + ":" + this.value;
}

public Object[] getOraData (Connection conn) throws SQLException {
ANYDATA data = null;
//int length = charset.encodedByteLength(this.value);
if (this.value.length() <= ReadContext.VC2_MAXSIZE) {
data = ANYDATA.convertDatum(new CHAR(this.value, null));
} else {
Clob lobdata = conn.createClob();
lobdata.setString(1, this.value);
data = ANYDATA.convertDatum((Datum) lobdata);
}
return new Object[] {this.ref.row, this.ref.column, this.type, data};
}

}

0 comments on commit 0498570

Please sign in to comment.