Skip to content

Commit

Permalink
Fix off by one error in dbase::File
Browse files Browse the repository at this point in the history
dbase::File calculated the position of the record/field
assuming the record size by using the header's info
which includes the implicit deletion flag size, and dbase::File
added the deletion flag size again resulting of bad readings.
  • Loading branch information
tmontaigu committed Oct 27, 2023
1 parent ddb11cb commit c8fe23b
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 24 deletions.
11 changes: 10 additions & 1 deletion src/datafusion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,9 @@ mod test {
// +-----------------------+
// | Franconia-Springfield |
// | Federal Center SW |
// | "Foggy Bottom GWU" |
// | "Farragut West" |
// | "Federal Triangle" |
// +-----------------------+

// extract first (and only) RecordBatch from dataframe
Expand All @@ -448,7 +451,13 @@ mod test {
// ensure values match
assert_eq!(
result[0].column(0).as_ref(),
&StringArray::from(vec!["Franconia-Springfield", "Federal Center SW"])
&StringArray::from(vec![
"Franconia-Springfield",
"Federal Center SW",
"Foggy Bottom GWU",
"Farragut West",
"Federal Triangle"
])
);
Ok(())
}
Expand Down
16 changes: 9 additions & 7 deletions src/file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ impl<'a, T> FieldRef<'a, T> {
.map(|i| i.field_length as u64)
.sum::<u64>();

record_position + position_in_record
record_position + position_in_record + DELETION_FLAG_SIZE as u64
}
}

Expand Down Expand Up @@ -206,14 +206,16 @@ where
pub fn seek_to_beginning(&mut self) -> Result<u64, FieldIOError> {
self.file
.inner
.seek(SeekFrom::Start(self.position_in_source()))
.seek(SeekFrom::Start(
self.position_in_source() + DELETION_FLAG_SIZE as u64,
))
.map_err(|e| FieldIOError::new(ErrorKind::IoError(e), None))
}

pub fn seek_before_deletion_flag(&mut self) -> Result<u64, FieldIOError> {
self.file
.inner
.seek(SeekFrom::Start(self.position_in_source() - 1))
.seek(SeekFrom::Start(self.position_in_source()))
.map_err(|e| FieldIOError::new(ErrorKind::IoError(e), None))
}
}
Expand All @@ -227,7 +229,7 @@ where
/// - true -> the record is marked as deleted
/// - false -> the record is **not** marked as deleted
pub fn is_deleted(&mut self) -> Result<bool, Error> {
let deletion_flag_pos = self.position_in_source() - DELETION_FLAG_SIZE as u64;
let deletion_flag_pos = self.position_in_source();
self.file
.inner
.seek(SeekFrom::Start(deletion_flag_pos))
Expand Down Expand Up @@ -329,7 +331,7 @@ impl<'a, T> FileRecordIterator<'a, T> {
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// let mut file = dbase::File::open_read_only("tests/data/stations.dbf")?;
///
/// assert_eq!(file.num_records(), 6);
/// assert_eq!(file.num_records(), 86);
///
/// let name_idx = file.field_index("name").unwrap();
/// let marker_color_idx = file.field_index("marker-col").unwrap();
Expand Down Expand Up @@ -501,8 +503,8 @@ impl<T: Write + Seek> File<T> {
);

let end_of_last_record = self.header.offset_to_first_record as u64
+ self.num_records() as u64
* (DELETION_FLAG_SIZE as u64 + self.header.size_of_record as u64);
+ (self.num_records() as u64 * self.header.size_of_record as u64);

self.inner
.seek(SeekFrom::Start(end_of_last_record))
.map_err(|error| Error::io_error(error, self.num_records()))?;
Expand Down
6 changes: 2 additions & 4 deletions src/header.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ use crate::encoding::DynEncoding;
use std::io::{Read, Write};

use crate::field::types::Date;
use crate::field::DELETION_FLAG_SIZE;
use crate::memo::MemoFileType;

// Used this as source: https://blog.codetitans.pl/post/dbf-and-language-code-page/
Expand Down Expand Up @@ -412,9 +411,8 @@ impl Header {
if index >= self.num_records as usize {
None
} else {
let offset = self.offset_to_first_record as usize
+ (index * (self.size_of_record as usize + DELETION_FLAG_SIZE))
+ DELETION_FLAG_SIZE;
let offset =
self.offset_to_first_record as usize + (index * self.size_of_record as usize);
Some(offset)
}
}
Expand Down
Binary file modified tests/data/stations.dbf
Binary file not shown.
37 changes: 25 additions & 12 deletions tests/test_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ fn copy_to_named_tmp_file(origin: &str) -> std::io::Result<tempfile::NamedTempFi
fn test_file_read_only() -> Result<(), Box<dyn std::error::Error>> {
let mut file = dbase::File::open_read_only("tests/data/stations.dbf")?;

assert_eq!(file.num_records(), 6);
assert_eq!(file.num_records(), STATIONS_DBG_NUM_RECORDS);

let name_idx = file.field_index("name").unwrap();
let marker_color_idx = file.field_index("marker-col").unwrap();
Expand Down Expand Up @@ -89,12 +89,14 @@ fn test_file_read_only() -> Result<(), Box<dyn std::error::Error>> {
Ok(())
}

const STATIONS_DBG_NUM_RECORDS: usize = 86;

#[test]
fn test_file_read_write() -> Result<(), Box<dyn std::error::Error>> {
let tmp_file = copy_to_tmp_file("tests/data/stations.dbf")?;
let mut file = dbase::File::open(tmp_file)?;

assert_eq!(file.num_records(), 6);
assert_eq!(file.num_records(), STATIONS_DBG_NUM_RECORDS);

let name_idx = file.field_index("name").unwrap();
let marker_color_idx = file.field_index("marker-col").unwrap();
Expand Down Expand Up @@ -222,20 +224,20 @@ fn test_file_append_record() -> Result<(), Box<dyn std::error::Error>> {

{
let mut file = dbase::File::open_read_write(tmp_file.path())?;
assert_eq!(file.num_records(), 6);
assert_eq!(file.num_records(), STATIONS_DBG_NUM_RECORDS);
file.append_record(&new_record)?;

assert_eq!(file.num_records(), 7);
let record = file.record(6).unwrap().read()?;
assert_eq!(file.num_records(), STATIONS_DBG_NUM_RECORDS + 1);
let record = file.record(STATIONS_DBG_NUM_RECORDS).unwrap().read()?;
assert_eq!(record, new_record);
}

{
// Check that after closing the file, if we re-open it,
// our appended record is still here
let mut file = dbase::File::open_read_write(tmp_file.path())?;
assert_eq!(file.num_records(), 7);
let record = file.record(6).unwrap().read()?;
assert_eq!(file.num_records(), STATIONS_DBG_NUM_RECORDS + 1);
let record = file.record(STATIONS_DBG_NUM_RECORDS).unwrap().read()?;
assert_eq!(record, new_record);
}

Expand Down Expand Up @@ -299,11 +301,25 @@ fn test_file_char_trimming() -> Result<(), Box<dyn std::error::Error>> {
}
);

let mut file = dbase::File::open_read_only("tests/data/stations.dbf")?;
let reading = dbase::ReadingOptions::default().character_trim(dbase::TrimOption::End);
file.set_options(reading);

let expected_trim_end = StationRecord {
name: format!("{}", "Franconia-Springfield",),
marker_col: format!("{}", "#0000ff",),
marker_sym: format!("{}", "rail-metro",),
line: format!("{}", "blue",),
};

let record = file.record(1).unwrap().read_as::<StationRecord>()?;
assert_eq!(record, expected_trim_end);

let mut file = dbase::File::open_read_only("tests/data/stations.dbf")?;
let reading = dbase::ReadingOptions::default().character_trim(dbase::TrimOption::Begin);
file.set_options(reading);

let expected = StationRecord {
let expected_trim_begin = StationRecord {
name: format!(
"{:width$}",
"Franconia-Springfield",
Expand All @@ -325,11 +341,8 @@ fn test_file_char_trimming() -> Result<(), Box<dyn std::error::Error>> {
width = file.fields()[0].length() as usize
),
};

let record = file.record(1).unwrap().read_as::<StationRecord>()?;

assert_eq!(record, expected);

assert_eq!(record, expected_trim_begin);
Ok(())
}

Expand Down

0 comments on commit c8fe23b

Please sign in to comment.