-
-
Notifications
You must be signed in to change notification settings - Fork 1.7k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
baba89c
commit e846a56
Showing
17 changed files
with
322 additions
and
9 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
{"category":"vegetables","calories":45,"fats_g":0.5,"sugars_g":2} | ||
{"category":"seafood","calories":150,"fats_g":5.0,"sugars_g":0} | ||
{"category":"meat","calories":100,"fats_g":5.0,"sugars_g":0} | ||
{"category":"fruit","calories":60,"fats_g":0.0,"sugars_g":11} | ||
{"category":"seafood","calories":140,"fats_g":5.0,"sugars_g":1} | ||
{"category":"meat","calories":120,"fats_g":10.0,"sugars_g":1} | ||
{"category":"vegetables","calories":20,"fats_g":0.0,"sugars_g":2} | ||
{"category":"fruit","calories":30,"fats_g":0.0,"sugars_g":5} | ||
{"category":"seafood","calories":130,"fats_g":5.0,"sugars_g":0} | ||
{"category":"fruit","calories":50,"fats_g":4.5,"sugars_g":0} | ||
{"category":"meat","calories":110,"fats_g":7.0,"sugars_g":0} | ||
{"category":"vegetables","calories":25,"fats_g":0.0,"sugars_g":2} | ||
{"category":"fruit","calories":30,"fats_g":0.0,"sugars_g":3} | ||
{"category":"vegetables","calories":22,"fats_g":0.0,"sugars_g":3} | ||
{"category":"vegetables","calories":25,"fats_g":0.0,"sugars_g":4} | ||
{"category":"seafood","calories":100,"fats_g":5.0,"sugars_g":0} | ||
{"category":"seafood","calories":200,"fats_g":10.0,"sugars_g":0} | ||
{"category":"seafood","calories":200,"fats_g":7.0,"sugars_g":2} | ||
{"category":"fruit","calories":60,"fats_g":0.0,"sugars_g":11} | ||
{"category":"meat","calories":110,"fats_g":7.0,"sugars_g":0} | ||
{"category":"vegetables","calories":25,"fats_g":0.0,"sugars_g":3} | ||
{"category":"seafood","calories":200,"fats_g":7.0,"sugars_g":2} | ||
{"category":"seafood","calories":130,"fats_g":1.5,"sugars_g":0} | ||
{"category":"fruit","calories":130,"fats_g":0.0,"sugars_g":25} | ||
{"category":"meat","calories":100,"fats_g":7.0,"sugars_g":0} | ||
{"category":"vegetables","calories":30,"fats_g":0.0,"sugars_g":5} | ||
{"category":"fruit","calories":50,"fats_g":0.0,"sugars_g":11} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
use polars_core::prelude::*; | ||
use polars_io::RowCount; | ||
|
||
use super::{LazyFrame, ScanArgsAnonymous}; | ||
|
||
pub struct LazyJsonLineReader { | ||
pub(crate) path: String, | ||
pub(crate) batch_size: Option<usize>, | ||
pub(crate) low_memory: bool, | ||
pub(crate) rechunk: bool, | ||
pub(crate) schema: Option<Schema>, | ||
pub(crate) row_count: Option<RowCount>, | ||
pub(crate) infer_schema_length: Option<usize>, | ||
pub(crate) n_rows: Option<usize>, | ||
} | ||
|
||
impl LazyJsonLineReader { | ||
pub fn new(path: String) -> Self { | ||
LazyJsonLineReader { | ||
path, | ||
batch_size: None, | ||
low_memory: false, | ||
rechunk: true, | ||
schema: None, | ||
row_count: None, | ||
infer_schema_length: Some(100), | ||
n_rows: None, | ||
} | ||
} | ||
/// Add a `row_count` column. | ||
#[must_use] | ||
pub fn with_row_count(mut self, row_count: Option<RowCount>) -> Self { | ||
self.row_count = row_count; | ||
self | ||
} | ||
/// Try to stop parsing when `n` rows are parsed. During multithreaded parsing the upper bound `n` cannot | ||
/// be guaranteed. | ||
#[must_use] | ||
pub fn with_n_rows(mut self, num_rows: Option<usize>) -> Self { | ||
self.n_rows = num_rows; | ||
self | ||
} | ||
/// Set the number of rows to use when inferring the json schema. | ||
/// the default is 100 rows. | ||
/// Setting to `None` will do a full table scan, very slow. | ||
#[must_use] | ||
pub fn with_infer_schema_length(mut self, num_rows: Option<usize>) -> Self { | ||
self.infer_schema_length = num_rows; | ||
self | ||
} | ||
/// Set the JSON file's schema | ||
#[must_use] | ||
pub fn with_schema(mut self, schema: Schema) -> Self { | ||
self.schema = Some(schema); | ||
self | ||
} | ||
|
||
/// Reduce memory usage in expensive of performance | ||
#[must_use] | ||
pub fn low_memory(mut self, toggle: bool) -> Self { | ||
self.low_memory = toggle; | ||
self | ||
} | ||
|
||
/// Rechunk the memory to contiguous chunks when parsing is done. | ||
#[must_use] | ||
pub fn with_rechunk(mut self, toggle: bool) -> Self { | ||
self.rechunk = toggle; | ||
self | ||
} | ||
|
||
#[must_use] | ||
pub fn with_batch_size(mut self, batch_size: Option<usize>) -> Self { | ||
self.batch_size = batch_size; | ||
self | ||
} | ||
|
||
pub fn finish(self) -> Result<LazyFrame> { | ||
let options = ScanArgsAnonymous { | ||
name: "JSON SCAN", | ||
infer_schema_length: self.infer_schema_length, | ||
n_rows: self.n_rows, | ||
row_count: self.row_count.clone(), | ||
schema: self.schema.clone(), | ||
..ScanArgsAnonymous::default() | ||
}; | ||
|
||
LazyFrame::anonymous_scan(std::sync::Arc::new(self), options) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
30 changes: 30 additions & 0 deletions
30
polars/polars-lazy/src/physical_plan/executors/scan/ndjson.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
use super::*; | ||
use crate::prelude::{AnonymousScan, AnonymousScanOptions, LazyJsonLineReader}; | ||
|
||
impl AnonymousScan for LazyJsonLineReader { | ||
fn scan(&self, scan_opts: AnonymousScanOptions) -> Result<DataFrame> { | ||
let schema = scan_opts.output_schema.unwrap_or(scan_opts.schema); | ||
JsonLineReader::from_path(&self.path)? | ||
.with_schema(&schema) | ||
.with_rechunk(self.rechunk) | ||
.with_chunk_size(self.batch_size) | ||
.low_memory(self.low_memory) | ||
.with_n_rows(scan_opts.n_rows) | ||
.with_chunk_size(self.batch_size) | ||
.finish() | ||
} | ||
|
||
fn schema(&self, infer_schema_length: Option<usize>) -> Result<Schema> { | ||
let f = std::fs::File::open(&self.path)?; | ||
let mut reader = std::io::BufReader::new(f); | ||
|
||
let data_type = arrow_ndjson::read::infer(&mut reader, infer_schema_length) | ||
.map_err(|err| PolarsError::ComputeError(format!("{:#?}", err).into()))?; | ||
let schema: Schema = StructArray::get_fields(&data_type).into(); | ||
|
||
Ok(schema) | ||
} | ||
fn allows_projection_pushdown(&self) -> bool { | ||
true | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.