Skip to content

Commit

Permalink
fix categorical read_csv
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Jan 2, 2022
1 parent 29acd19 commit 483879d
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 5 deletions.
9 changes: 7 additions & 2 deletions polars/polars-io/src/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
//! }
//! ```
//!
use crate::csv_core::csv::CoreReader;
use crate::csv_core::csv::{cast_columns, CoreReader};
use crate::csv_core::utils::get_reader_bytes;
use crate::mmap::MmapBytesReader;
use crate::utils::resolve_homedir;
Expand Down Expand Up @@ -446,6 +446,8 @@ where
/// Read the file and create the DataFrame.
fn finish(mut self) -> Result<DataFrame> {
let rechunk = self.rechunk;
// we cannot append categorical under local string cache, so we cast them later.
let mut to_cast_local = vec![];

let mut df = if let Some(schema) = self.schema_overwrite {
// This branch we check if there are dtypes we cannot parse.
Expand All @@ -460,7 +462,7 @@ where
match fld.data_type() {
// For categorical we first read as utf8 and later cast to categorical
Categorical => {
to_cast.push(fld);
to_cast_local.push(fld);
Some(Field::new(fld.name(), DataType::Utf8))
}
Date | Datetime(_, _) => {
Expand Down Expand Up @@ -572,6 +574,9 @@ where
};
df = parse_dates(df, &*fixed_schema)
}

// TODO: parallelize this?
cast_columns(&mut df, &to_cast_local)?;
Ok(df)
}
}
Expand Down
6 changes: 3 additions & 3 deletions polars/polars-io/src/csv_core/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use std::fmt;
use std::sync::atomic::Ordering;
use std::sync::{atomic::AtomicUsize, Arc};

pub fn to_cast(df: &mut DataFrame, to_cast: &[&Field]) -> Result<()> {
pub(crate) fn cast_columns(df: &mut DataFrame, to_cast: &[&Field]) -> Result<()> {
// cast to the original dtypes in the schema
for fld in to_cast {
use DataType::*;
Expand Down Expand Up @@ -503,7 +503,7 @@ impl<'a> CoreReader<'a> {
}

df.map(|mut df| {
to_cast(&mut df, self.to_cast)?;
cast_columns(&mut df, self.to_cast)?;
Ok(df)
})
.transpose()
Expand Down Expand Up @@ -581,7 +581,7 @@ impl<'a> CoreReader<'a> {
.collect::<Result<_>>()?,
);

to_cast(&mut df, self.to_cast)?;
cast_columns(&mut df, self.to_cast)?;
Ok(df)
})
.collect::<Result<Vec<_>>>()
Expand Down

0 comments on commit 483879d

Please sign in to comment.