Skip to content

Commit

Permalink
Merge pull request #330 from comsysto/feature/support_missing_r_attri…
Browse files Browse the repository at this point in the history
…butes

fixes #261 by supporting missing r attributes in XLSX files
  • Loading branch information
tafia committed Jun 13, 2023
2 parents 071c7ef + 68ead30 commit 13e6ad3
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 8 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ Cargo.lock
*.bk
.vim
fuzz.xlsx
.idea
66 changes: 58 additions & 8 deletions src/xlsx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,10 @@ pub enum XlsxError {
DimensionCount(usize),
/// Cell 't' attribute error
CellTAttribute(String),
/// Cell 'r' attribute error
CellRAttribute,
/// There is no column component in the range string
RangeWithoutColumnComponent,
/// There is no row component in the range string
RangeWithoutRowCompontent,
/// Unexpected error
Unexpected(&'static str),
/// Cell error
Expand Down Expand Up @@ -106,7 +108,12 @@ impl std::fmt::Display for XlsxError {
write!(f, "Range dimension must be lower than 2. Got {}", e)
}
XlsxError::CellTAttribute(e) => write!(f, "Unknown cell 't' attribute: {:?}", e),
XlsxError::CellRAttribute => write!(f, "Cell missing 'r' attribute"),
XlsxError::RangeWithoutColumnComponent => {
write!(f, "Range is missing the expected column component.")
}
XlsxError::RangeWithoutRowCompontent => {
write!(f, "Range is missing the expected row component.")
}
XlsxError::Unexpected(e) => write!(f, "{}", e),
XlsxError::CellError(e) => write!(f, "Unsupported cell error value '{}'", e),
}
Expand Down Expand Up @@ -889,13 +896,35 @@ where
{
let mut buf = Vec::new();
let mut cell_buf = Vec::new();

let mut row_index = 0;
let mut col_index = 0;

loop {
buf.clear();
match xml.read_event_into(&mut buf) {
Ok(Event::Start(ref row_element)) if row_element.local_name().as_ref() == b"row" => {
let attribute = get_attribute(row_element.attributes(), QName(b"r"))?;
if let Some(range) = attribute {
let row = get_row(range)?;
row_index = row;
}
}
Ok(Event::End(ref row_element)) if row_element.local_name().as_ref() == b"row" => {
row_index += 1;
col_index = 0;
}
Ok(Event::Start(ref c_element)) if c_element.local_name().as_ref() == b"c" => {
let pos = get_attribute(c_element.attributes(), QName(b"r"))
.and_then(|o| o.ok_or(XlsxError::CellRAttribute))
.and_then(get_row_column)?;
let attribute = get_attribute(c_element.attributes(), QName(b"r"))?;

let pos = if let Some(range) = attribute {
let (row, col) = get_row_column(range)?;
col_index = col;
(row, col)
} else {
(row_index, col_index)
};

loop {
cell_buf.clear();
match xml.read_event_into(&mut cell_buf) {
Expand All @@ -906,6 +935,7 @@ where
_ => (),
}
}
col_index += 1;
}
Ok(Event::End(ref e)) if e.local_name().as_ref() == b"sheetData" => return Ok(()),
Ok(Event::Eof) => return Err(XlsxError::XmlEof("sheetData")),
Expand Down Expand Up @@ -1087,8 +1117,25 @@ fn get_dimension(dimension: &[u8]) -> Result<Dimensions, XlsxError> {
}
}

/// converts a text range name into its position (row, column) (0 based index)
/// Converts a text range name into its position (row, column) (0 based index).
/// If the row or column component in the range is missing, an Error is returned.
fn get_row_column(range: &[u8]) -> Result<(u32, u32), XlsxError> {
let (row, col) = get_row_and_optional_column(range)?;
let col = col.ok_or(XlsxError::RangeWithoutColumnComponent)?;
Ok((row, col))
}

/// Converts a text row name into its position (0 based index).
/// If the row component in the range is missing, an Error is returned.
/// If the text row name also contains a column component, it is ignored.
fn get_row(range: &[u8]) -> Result<u32, XlsxError> {
get_row_and_optional_column(range).map(|(row, _)| row)
}

/// Converts a text range name into its position (row, column) (0 based index).
/// If the row component in the range is missing, an Error is returned.
/// If the column component in the range is missing, an None is returned for the column.
fn get_row_and_optional_column(range: &[u8]) -> Result<(u32, Option<u32>), XlsxError> {
let (mut row, mut col) = (0, 0);
let mut pow = 1;
let mut readrow = true;
Expand Down Expand Up @@ -1121,7 +1168,10 @@ fn get_row_column(range: &[u8]) -> Result<(u32, u32), XlsxError> {
_ => return Err(XlsxError::Alphanumeric(*c)),
}
}
Ok((row.saturating_sub(1), col - 1))
let row = row
.checked_sub(1)
.ok_or(XlsxError::RangeWithoutRowCompontent)?;
Ok((row, col.checked_sub(1)))
}

/// attempts to read either a simple or richtext string
Expand Down
Binary file added tests/issue_261.xlsx
Binary file not shown.
Binary file added tests/issue_261_fixed_by_excel.xlsx
Binary file not shown.
70 changes: 70 additions & 0 deletions tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1016,6 +1016,76 @@ fn issue_252() {
assert!(open_workbook::<Xls<_>, _>(&path).is_err());
}

#[test]
fn issue_261() {
setup();

let mut workbook_with_missing_r_attributes = {
let path = format!("{}/tests/issue_261.xlsx", env!("CARGO_MANIFEST_DIR"));
open_workbook::<Xlsx<_>, _>(&path).unwrap()
};

let mut workbook_fixed_by_excel = {
let path = format!(
"{}/tests/issue_261_fixed_by_excel.xlsx",
env!("CARGO_MANIFEST_DIR")
);
open_workbook::<Xlsx<_>, _>(&path).unwrap()
};

let range_a = workbook_fixed_by_excel
.worksheet_range("Some Sheet")
.unwrap()
.unwrap();

let range_b = workbook_with_missing_r_attributes
.worksheet_range("Some Sheet")
.unwrap()
.unwrap();

assert_eq!(range_a.cells().count(), 462);
assert_eq!(range_a.cells().count(), 462);
assert_eq!(range_a.rows().count(), 66);
assert_eq!(range_b.rows().count(), 66);

assert_eq!(
range_b.get_value((0, 0)).unwrap(),
&String("String Value 32".into())
);
range_b
.rows()
.nth(4)
.unwrap()
.iter()
.for_each(|cell| assert!(cell.is_empty()));

assert_eq!(range_b.get_value((60, 6)).unwrap(), &Float(939.));
assert_eq!(
range_b.get_value((65, 0)).unwrap(),
&String("String Value 42".into())
);

assert_eq!(
range_b.get_value((65, 3)).unwrap(),
&String("String Value 8".into())
);

range_a
.rows()
.zip(range_b.rows().filter(|r| !r.is_empty()))
.enumerate()
.for_each(|(i, (lhs, rhs))| {
assert_eq!(
lhs,
rhs,
"Expected row {} to be {:?}, but found {:?}",
i + 1,
lhs,
rhs
)
});
}

#[test]
fn test_values_xls() {
let path = format!(
Expand Down

0 comments on commit 13e6ad3

Please sign in to comment.