Skip to content

Commit

Permalink
Use dictionary lookup for geting indices for >100 column names when r…
Browse files Browse the repository at this point in the history
…eading from IPC; closes #1760
  • Loading branch information
ghuls authored and ritchie46 committed Nov 14, 2021
1 parent a56d1dc commit cdc3ace
Showing 1 changed file with 25 additions and 4 deletions.
29 changes: 25 additions & 4 deletions polars/polars-io/src/ipc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
use super::{finish_reader, ArrowReader, ArrowResult, RecordBatch};
use crate::prelude::*;
use crate::{PhysicalIoExpr, ScanAggregation};
use ahash::AHashMap;
use arrow::io::ipc::write::WriteOptions;
use arrow::io::ipc::{read, write};
use polars_core::prelude::*;
Expand Down Expand Up @@ -160,14 +161,34 @@ where

if let Some(cols) = self.columns {
let mut prj = Vec::with_capacity(cols.len());
for column in cols.iter() {
let i = schema.index_of(column)?;
prj.push(i)
if cols.len() > 100 {
let mut column_names = AHashMap::with_capacity(schema.fields().len());
schema.fields().iter().enumerate().for_each(|(i, c)| {
column_names.insert(c.name(), i);
});

for column in cols.iter() {
if let Some(i) = column_names.get(&column) {
prj.push(*i);
} else {
let valid_fields: Vec<String> =
schema.fields().iter().map(|f| f.name().clone()).collect();
return Err(PolarsError::NotFound(format!(
"Unable to get field named \"{}\". Valid fields: {:?}",
column, valid_fields
)));
}
}
} else {
for column in cols.iter() {
let i = schema.index_of(column)?;
prj.push(i);
}
}

// Ipc reader panics if the projection is not in increasing order, so sorting is the safer way.
prj.sort_unstable();
self.projection = Some(prj)
self.projection = Some(prj);
}

let ipc_reader = read::FileReader::new(&mut self.reader, metadata, self.projection);
Expand Down

0 comments on commit cdc3ace

Please sign in to comment.