Skip to content

Commit

Permalink
WIP: handle namespaced XML (#73)
Browse files Browse the repository at this point in the history
* bump quick-xml to 0.7.0

(cherry picked from commit 9f029bc)

* add failing richtext-namespaced test case

tests both namespaced xml content and inline v. shared richtext strings

(cherry picked from commit 3cbe855)

* fix richtext_namespaced test case

* introduces LocalName trait
* impl `LocalName` for `Bytes{Start,End}`, which returns the name
  with any prefix removed
* use `local_name()` to handle namespacing

(cherry picked from commit 50176dd)

* explicitly check for various closing tags

(cherry picked from commit 258ad64)

* simplify `read_string` signature again

(cherry picked from commit a294fbf)
  • Loading branch information
pfernie authored and tafia committed Mar 23, 2017
1 parent 104551a commit d149214
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 35 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ readme = "README.md"

[dependencies]
zip = { version = "0.2.2", default-features = false }
quick-xml = "0.6.2"
quick-xml = "0.7.0"
log = "0.3.7"
encoding_rs = "0.5.0"
byteorder = "1.0.0"
Expand Down
61 changes: 28 additions & 33 deletions src/xlsx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,13 @@ impl ExcelReader for Xlsx {
let mut buf = Vec::new();
loop {
match xml.read_event(&mut buf) {
Ok(Event::Start(ref e)) if e.name() == b"si" => {
if let Some(s) = read_string(&mut xml, b"si")? {
Ok(Event::Start(ref e)) if e.local_name() == b"si" => {
if let Some(s) = read_string(&mut xml, e.name())? {
strings.push(s);
}
}
Ok(Event::End(ref e)) if e.name() == b"sst" => break,
Ok(Event::End(ref e)) if e.local_name() == b"sst" => break,
Ok(Event::Eof) => return Err("unexpected end of xml (no </sst>)".into()),
_ => (),
}
buf.clear();
Expand All @@ -84,7 +85,7 @@ impl ExcelReader for Xlsx {
let mut buf = Vec::new();
loop {
match xml.read_event(&mut buf) {
Ok(Event::Start(ref e)) if e.name() == b"sheet" => {
Ok(Event::Start(ref e)) if e.local_name() == b"sheet" => {
let mut name = String::new();
let mut path = String::new();
for a in e.attributes() {
Expand All @@ -110,7 +111,8 @@ impl ExcelReader for Xlsx {
}
sheets.push((name, path));
}
Ok(Event::Eof) => break,
Ok(Event::End(ref e)) if e.local_name() == b"workbook" => break,
Ok(Event::Eof) => return Err("unexpected end of xml (no </workbook>)".into()),
Err(e) => return Err(e.into()),
_ => (),
}
Expand All @@ -128,7 +130,7 @@ impl ExcelReader for Xlsx {
let mut buf = Vec::new();
loop {
match xml.read_event(&mut buf) {
Ok(Event::Start(ref e)) if e.name() == b"Relationship" => {
Ok(Event::Start(ref e)) if e.local_name() == b"Relationship" => {
let mut id = Vec::new();
let mut target = String::new();
for a in e.attributes() {
Expand All @@ -142,7 +144,8 @@ impl ExcelReader for Xlsx {
}
relationships.insert(id, target);
}
Ok(Event::Eof) => break,
Ok(Event::End(ref e)) if e.local_name() == b"Relationships" => break,
Ok(Event::Eof) => return Err("unexpected end of xml (no </Relationships>)".into()),
Err(e) => return Err(e.into()),
_ => (),
}
Expand All @@ -162,7 +165,7 @@ impl ExcelReader for Xlsx {
match xml.read_event(&mut buf) {
Err(e) => return Err(e.into()),
Ok(Event::Start(ref e)) => {
match e.name() {
match e.local_name() {
b"dimension" => {
for a in e.attributes() {
if let Attribute { key: b"ref", value: rdim } = a? {
Expand All @@ -178,7 +181,8 @@ impl ExcelReader for Xlsx {
_ => (),
}
}
Ok(Event::Eof) => break,
Ok(Event::End(ref e)) if e.local_name() == b"worksheet" => break,
Ok(Event::Eof) => return Err("unexpected end of xml (no </worksheet>)".into()),
_ => (),
}
buf.clear();
Expand All @@ -193,12 +197,7 @@ fn read_sheet_data(xml: &mut Reader<BufReader<ZipFile>>,
cells: &mut Vec<Cell>)
-> Result<()> {
/// read the contents of a <v> cell
fn read_value<'a>(xml: &mut Reader<BufReader<ZipFile>>,
strings: &[String],
atts: Attributes<'a>,
buf: &mut Vec<u8>)
-> Result<DataType> {
let v = xml.read_text(b"v", buf)?;
fn read_value<'a>(v: String, strings: &[String], atts: Attributes<'a>) -> Result<DataType> {
match get_attribute(atts, b"t")? {
Some(b"s") => {
// shared string
Expand Down Expand Up @@ -273,7 +272,7 @@ fn read_sheet_data(xml: &mut Reader<BufReader<ZipFile>>,
loop {
match xml.read_event(&mut buf) {
Err(e) => return Err(e.into()),
Ok(Event::Start(ref c_element)) if c_element.name() == b"c" => {
Ok(Event::Start(ref c_element)) if c_element.local_name() == b"c" => {
let pos = get_attribute(c_element.attributes(), b"r")
.and_then(|o| o.ok_or_else(|| "Cell missing 'r' attribute tag".into()))
.and_then(get_row_column)?;
Expand All @@ -284,21 +283,21 @@ fn read_sheet_data(xml: &mut Reader<BufReader<ZipFile>>,
Err(e) => return Err(e.into()),
Ok(Event::Start(ref e)) => {
debug!("e: {:?}", e);
match e.name() {
match e.local_name() {
b"is" => {
// inlineStr
if let Some(s) = read_string(xml, b"is")? {
if let Some(s) = read_string(xml, e.name())? {
cells.push(Cell::new(pos, DataType::String(s)));
}
break;
}
b"v" => {
// value
let v = xml.read_text(e.name(), &mut Vec::new())?;
cells.push(Cell::new(pos,
read_value(xml,
read_value(v,
strings,
c_element.attributes(),
&mut Vec::new())?));
c_element.attributes())?));
break;
}
b"f" => {} // ignore f nodes
Expand All @@ -308,22 +307,18 @@ fn read_sheet_data(xml: &mut Reader<BufReader<ZipFile>>,
}
}
}
Ok(Event::End(ref e)) if e.name() == b"c" => {
debug!("</c>");
break;
}
Ok(Event::Eof) => return Err("End of xml".into()),
Ok(Event::End(ref e)) if e.local_name() == b"c" => break,
Ok(Event::Eof) => return Err("unexpected end of xml (no </c>)".into()),
o => debug!("ignored Event: {:?}", o),
}
}
}
Ok(Event::End(ref e)) if e.name() == b"sheetData" => return Ok(()),
Ok(Event::Eof) => break,
Ok(Event::End(ref e)) if e.local_name() == b"sheetData" => return Ok(()),
Ok(Event::Eof) => return Err("unexpected end of xml (no </sheetData>)".into()),
_ => (),
}
buf.clear();
}
Err("Could not find </sheetData>".into())
}

/// converts a text representation (e.g. "A6:G67") of a dimension into integers
Expand Down Expand Up @@ -388,17 +383,17 @@ fn read_string(xml: &mut Reader<BufReader<ZipFile>>, closing: &[u8]) -> Result<O
let mut rich_buffer: Option<String> = None;
loop {
match xml.read_event(&mut buf) {
Ok(Event::Start(ref e)) if e.name() == b"r" => {
Ok(Event::Start(ref e)) if e.local_name() == b"r" => {
if rich_buffer.is_none() {
// use a buffer since richtext has multiples <r> and <t> for the same cell
rich_buffer = Some(String::new());
}
}
Ok(Event::End(ref e)) if e.name() == closing => {
Ok(Event::End(ref e)) if e.local_name() == closing => {
return Ok(rich_buffer);
}
Ok(Event::Start(ref e)) if e.name() == b"t" => {
let value = xml.read_text(b"t", &mut Vec::new())?;
Ok(Event::Start(ref e)) if e.local_name() == b"t" => {
let value = xml.read_text(e.name(), &mut Vec::new())?;
if let Some(ref mut s) = rich_buffer {
s.push_str(&value);
} else {
Expand Down
Binary file added tests/richtext-namespaced.xlsx
Binary file not shown.
20 changes: 19 additions & 1 deletion tests/test.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
extern crate calamine;

use calamine::Excel;
use calamine::DataType::{String, Float, Bool, Error};
use calamine::DataType::{String, Empty, Float, Bool, Error};
use calamine::CellErrorType::*;

macro_rules! range_eq {
Expand Down Expand Up @@ -155,3 +155,21 @@ fn special_chrs_xlsb() {
[String("֍".to_string())],
[String("àâéêèçöïî«»".to_string())]]);
}

#[test]
fn richtext_namespaced() {
let path = format!("{}/tests/richtext-namespaced.xlsx",
env!("CARGO_MANIFEST_DIR"));
let mut excel = Excel::open(&path).expect("cannot open excel file");

let range = excel.worksheet_range("Sheet1").unwrap();
range_eq!(range,
[[String("inline string\r\nLine 2\r\nLine 3".to_string()),
Empty,
Empty,
Empty,
Empty,
Empty,
Empty,
String("shared string\r\nLine 2\r\nLine 3".to_string())]]);
}

0 comments on commit d149214

Please sign in to comment.