Skip to content

Commit

Permalink
Allow infer_schema_length=None in pl.read_csv().
Browse files Browse the repository at this point in the history
  • Loading branch information
ghuls authored and ritchie46 committed Nov 5, 2021
1 parent d0ed0e9 commit ee26601
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 6 deletions.
5 changes: 3 additions & 2 deletions py-polars/polars/eager/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ def _from_pandas(
@staticmethod
def read_csv(
file: Union[str, BinaryIO, bytes],
infer_schema_length: int = 100,
infer_schema_length: Optional[int] = 100,
batch_size: int = 64,
has_headers: bool = True,
ignore_errors: bool = False,
Expand All @@ -379,7 +379,8 @@ def read_csv(
file
Path to a file or a file like object. Any valid filepath can be used. Example: `file.csv`.
infer_schema_length
Maximum number of lines to read to infer schema.
Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8.
If set to `None`, a full table scan will be done (slow).
batch_size
Number of lines to read into the buffer at once. Modify this to change performance.
has_headers
Expand Down
5 changes: 3 additions & 2 deletions py-polars/polars/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,7 @@ def update_columns(df: "pl.DataFrame", new_columns: List[str]) -> "pl.DataFrame"

def read_csv(
file: Union[str, TextIO, Path, BinaryIO, bytes],
infer_schema_length: int = 100,
infer_schema_length: Optional[int] = 100,
batch_size: int = 8192,
has_headers: bool = True,
ignore_errors: bool = False,
Expand Down Expand Up @@ -181,7 +181,8 @@ def read_csv(
or ``StringIO`` or ``BytesIO``.
If ``fsspec`` is installed, it will be used to open remote files
infer_schema_length
Maximum number of lines to read to infer schema.
Maximum number of lines to read to infer schema. If set to 0, all columns will be read as pl.Utf8.
If set to `None`, a full table scan will be done (slow).
batch_size
Number of lines to read into the buffer at once. Modify this to change performance.
has_headers
Expand Down
4 changes: 2 additions & 2 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ impl PyDataFrame {
#[allow(clippy::too_many_arguments)]
pub fn read_csv(
py_f: &PyAny,
infer_schema_length: usize,
infer_schema_length: Option<usize>,
chunk_size: usize,
has_header: bool,
ignore_errors: bool,
Expand Down Expand Up @@ -147,7 +147,7 @@ impl PyDataFrame {

let mmap_bytes_r = get_mmap_bytes_reader(py_f)?;
let df = CsvReader::new(mmap_bytes_r)
.infer_schema(Some(infer_schema_length))
.infer_schema(infer_schema_length)
.has_header(has_header)
.with_stop_after_n_rows(stop_after_n_rows)
.with_delimiter(sep.as_bytes()[0])
Expand Down

0 comments on commit ee26601

Please sign in to comment.