From a904e3dcc5f36adc084daf5afaabe2b646928bbb Mon Sep 17 00:00:00 2001 From: Johann Tuffe Date: Sat, 17 Jun 2023 03:55:18 +0800 Subject: [PATCH] read formula values in xls --- src/xls.rs | 104 ++++++++++++++++++++++++++++++++++++-------------- tests/test.rs | 18 ++++++++- 2 files changed, 92 insertions(+), 30 deletions(-) diff --git a/src/xls.rs b/src/xls.rs index 4bfd09af..4e5df87f 100644 --- a/src/xls.rs +++ b/src/xls.rs @@ -377,6 +377,7 @@ impl Xls { let records = RecordIter { stream: sh }; let mut cells = Vec::new(); let mut formulas = Vec::new(); + let mut fmla_pos = (0, 0); for record in records { let r = record?; match r.typ { @@ -390,14 +391,32 @@ impl Xls { //0x0201 => cells.push(parse_blank(r.data)?), // 513: Blank 0x0203 => cells.push(parse_number(r.data, &self.formats, self.is_1904)?), // 515: Number 0x0205 => cells.push(parse_bool_err(r.data)?), // 517: BoolErr - 0x027E => cells.push(parse_rk(r.data, &self.formats, self.is_1904)?), // 636: Rk + 0x0207 => { + // 519 String (formula value) + let val = DataType::String(parse_string(r.data, &mut encoding)?); + cells.push(Cell::new(fmla_pos, val)) + } + 0x027E => cells.push(parse_rk(r.data, &self.formats, self.is_1904)?), // 638: Rk 0x00FD => cells.extend(parse_label_sst(r.data, &strings)?), // LabelSst 0x00BD => parse_mul_rk(r.data, &mut cells, &self.formats, self.is_1904)?, // 189: MulRk 0x000A => break, // 10: EOF, 0x0006 => { // 6: Formula + if r.data.len() < 20 { + return Err(XlsError::Len { + expected: 20, + found: r.data.len(), + typ: "Formuula", + }); + } let row = read_u16(r.data); let col = read_u16(&r.data[2..]); + fmla_pos = (row as u32, col as u32); + if let Some(val) = parse_formula_value(&r.data[6..14])? { + // If the value is a string + // it will appear in 0x0207 record coming next + cells.push(Cell::new(fmla_pos, val)); + } let fmla = parse_formula( &r.data[20..], &fmla_sheet_names, @@ -413,7 +432,7 @@ impl Xls { row, col, e ) }); - formulas.push(Cell::new((row as u32, col as u32), fmla)); + formulas.push(Cell::new(fmla_pos, fmla)); } _ => (), } @@ -486,32 +505,32 @@ fn parse_bool_err(r: &[u8]) -> Result, XlsError> { } let row = read_u16(r); let col = read_u16(&r[2..]); - let v = match r[7] { - 0x00 => DataType::Bool(r[6] != 0), - 0x01 => match r[6] { - 0x00 => DataType::Error(CellErrorType::Null), - 0x07 => DataType::Error(CellErrorType::Div0), - 0x0F => DataType::Error(CellErrorType::Value), - 0x17 => DataType::Error(CellErrorType::Ref), - 0x1D => DataType::Error(CellErrorType::Name), - 0x24 => DataType::Error(CellErrorType::Num), - 0x2A => DataType::Error(CellErrorType::NA), - 0x2B => DataType::Error(CellErrorType::GettingData), - e => { - return Err(XlsError::Unrecognized { - typ: "error", - val: e, - }); - } - }, - e => { - return Err(XlsError::Unrecognized { - typ: "fError", - val: e, - }); - } - }; - Ok(Cell::new((row as u32, col as u32), v)) + let pos = (row as u32, col as u32); + match r[7] { + 0x00 => Ok(Cell::new(pos, DataType::Bool(r[6] != 0))), + 0x01 => Ok(Cell::new(pos, parse_err(r[6])?)), + e => Err(XlsError::Unrecognized { + typ: "fError", + val: e, + }), + } +} + +fn parse_err(e: u8) -> Result { + match e { + 0x00 => Ok(DataType::Error(CellErrorType::Null)), + 0x07 => Ok(DataType::Error(CellErrorType::Div0)), + 0x0F => Ok(DataType::Error(CellErrorType::Value)), + 0x17 => Ok(DataType::Error(CellErrorType::Ref)), + 0x1D => Ok(DataType::Error(CellErrorType::Name)), + 0x24 => Ok(DataType::Error(CellErrorType::Num)), + 0x2A => Ok(DataType::Error(CellErrorType::NA)), + 0x2B => Ok(DataType::Error(CellErrorType::GettingData)), + e => Err(XlsError::Unrecognized { + typ: "error", + val: e, + }), + } } fn parse_rk(r: &[u8], formats: &[CellFormat], is_1904: bool) -> Result, XlsError> { @@ -604,6 +623,22 @@ fn parse_short_string(r: &mut Record<'_>, encoding: &mut XlsEncoding) -> Result< Ok(s) } +/// XLUnicodeString [MS-XLS 2.5.294] +fn parse_string(r: &[u8], encoding: &mut XlsEncoding) -> Result { + if r.len() < 2 { + return Err(XlsError::Len { + typ: "short string", + expected: 2, + found: r.len(), + }); + } + let cch = read_u16(r) as usize; + let high_byte = r[2] & 0x1 != 0; + let mut s = String::with_capacity(cch); + let _ = encoding.decode_to(r, cch, &mut s, Some(high_byte)); + Ok(s) +} + fn parse_label_sst(r: &[u8], strings: &[String]) -> Result>, XlsError> { if r.len() < 10 { return Err(XlsError::Len { @@ -1264,6 +1299,19 @@ fn parse_formula( } } +fn parse_formula_value(r: &[u8]) -> Result, XlsError> { + match r { + &[0x00, .., 0xFF, 0xFF] => Ok(None), // String, value should be in next record + &[0x01, _, b, .., 0xFF, 0xFF] => Ok(Some(DataType::Bool(b != 0))), + &[0x02, _, e, .., 0xFF, 0xFF] => parse_err(e).map(Some), + &[e, .., 0xFF, 0xFF] => Err(XlsError::Unrecognized { + typ: "error", + val: e, + }), + _ => Ok(Some(DataType::Float(read_f64(r)))), + } +} + /// OfficeArtRecord [MS-ODRAW 1.3.1] #[cfg(feature = "picture")] struct ArtRecord<'a> { diff --git a/tests/test.rs b/tests/test.rs index 18168820..4fee4d5e 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,8 +1,8 @@ -use calamine::CellErrorType::*; use calamine::DataType::{ Bool, DateTime, DateTimeIso, Duration, DurationIso, Empty, Error, Float, String, }; use calamine::{open_workbook, open_workbook_auto, Ods, Reader, Xls, Xlsb, Xlsx}; +use calamine::{CellErrorType::*, DataType}; use std::io::Cursor; use std::sync::Once; @@ -1231,7 +1231,7 @@ fn ods_number_rows_repeated() { } #[test] -fn xls_formula() { +fn issue304_xls_formula() { setup(); let path = format!("{}/tests/xls_formula.xls", env!("CARGO_MANIFEST_DIR")); let mut wb: Xls<_> = open_workbook(&path).unwrap(); @@ -1242,3 +1242,17 @@ fn xls_formula() { assert_eq!(rows.next(), Some(&["A1+Sheet2!A1".to_owned()][..])); assert_eq!(rows.next(), None); } + +#[test] +fn issue304_xls_values() { + setup(); + let path = format!("{}/tests/xls_formula.xls", env!("CARGO_MANIFEST_DIR")); + let mut wb: Xls<_> = open_workbook(&path).unwrap(); + let rge = wb.worksheet_range("Sheet1").unwrap().unwrap(); + let mut rows = rge.rows(); + assert_eq!(rows.next(), Some(&[DataType::Float(10.)][..])); + assert_eq!(rows.next(), Some(&[DataType::Float(20.)][..])); + assert_eq!(rows.next(), Some(&[DataType::Float(110.)][..])); + assert_eq!(rows.next(), Some(&[DataType::Float(65.)][..])); + assert_eq!(rows.next(), None); +}