Skip to content

Commit

Permalink
add to_dicts and make sure that column names are stored
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Sep 11, 2021
1 parent 1d08a63 commit 677d69b
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 24 deletions.
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Conversion
:toctree: api/

from_dict
from_dicts
from_records
from_arrow
from_pandas
Expand Down
35 changes: 35 additions & 0 deletions py-polars/polars/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

__all__ = [
"from_dict",
"from_dicts",
"from_records",
"from_arrow",
"from_pandas",
Expand Down Expand Up @@ -114,6 +115,40 @@ def from_records(
)


def from_dicts(dicts: Sequence[Dict[str, Any]]) -> "pl.DataFrame":
"""
Construct a DataFrame from a sequence of dictionaries.
Parameters
----------
dicts
Sequence with dictionaries mapping column name to value
Returns
-------
DataFrame
Examples
--------
>>> data = [{"a": 1, "b": 4}, {"a": 2, "b": 5}, {"a": 3, "b": 6}]
>>> df = pl.from_dicts(data)
>>> df
shape: (3, 2)
╭─────┬─────╮
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 4 │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 2 ┆ 5 │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 3 ┆ 6 │
╰─────┴─────╯
"""
return pl.DataFrame._from_dicts(dicts)


def from_arrow(
a: Union[pa.Table, pa.Array], rechunk: bool = True
) -> Union["pl.DataFrame", "pl.Series"]:
Expand Down
17 changes: 14 additions & 3 deletions py-polars/polars/eager/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,11 @@ def _from_pydf(cls, py_df: "PyDataFrame") -> "DataFrame":
df._df = py_df
return df

@classmethod
def _from_dicts(cls, data: Sequence[Dict[str, Any]]) -> "DataFrame":
pydf = PyDataFrame.read_dicts(data)
return DataFrame._from_pydf(pydf)

@classmethod
def _from_dict(
cls,
Expand Down Expand Up @@ -804,6 +809,15 @@ def to_ipc(self, file: Union[BinaryIO, str, Path]) -> None:

self._df.to_ipc(file)

def to_dicts(self) -> tp.List[Dict[str, Any]]:
pydf = self._df
names = self.columns

return [
{k: v for k, v in zip(names, pydf.row_tuple(i))}
for i in range(0, self.height)
]

def transpose(self) -> "pl.DataFrame":
"""
Transpose a DataFrame over the diagonal.
Expand Down Expand Up @@ -2955,9 +2969,6 @@ def row(self, index: int) -> Tuple[Any]:
def rows(self) -> tp.List[Tuple[Any]]:
"""
Convert columnar data to rows as python tuples.
"""
return self._df.row_tuples()

Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def sequence_to_pydf(
data_series.append(s.inner())

elif isinstance(data[0], dict):
pydf = PyDataFrame.read_records(data)
pydf = PyDataFrame.read_dicts(data)
if columns is not None:
pydf.set_column_names(columns)
return pydf
Expand Down
25 changes: 13 additions & 12 deletions py-polars/src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -333,32 +333,33 @@ pub(crate) fn str_to_null_strategy(strategy: &str) -> PyResult<NullStrategy> {
Ok(strategy)
}

pub(crate) fn records_to_rows(records: &PyAny) -> PyResult<Vec<Row>> {
let (records, len) = get_pyseq(records)?;
pub(crate) fn dicts_to_rows(records: &PyAny) -> PyResult<(Vec<Row>, Vec<String>)> {
let (dicts, len) = get_pyseq(records)?;
let mut rows = Vec::with_capacity(len);

let mut iter = records.iter()?;
let record = iter.next().unwrap()?;
let record = record.downcast::<PyDict>()?;
let vals = record.values();
let mut iter = dicts.iter()?;
let d = iter.next().unwrap()?;
let d = d.downcast::<PyDict>()?;
let vals = d.values();
let keys_first = d.keys().extract::<Vec<String>>()?;
let row = vals.extract::<Wrap<Row>>()?.0;
rows.push(row);

let keys = record.keys();
let keys = d.keys();
let width = keys.len();

for record in iter {
let record = record?;
let record = record.downcast::<PyDict>()?;
for d in iter {
let d = d?;
let d = d.downcast::<PyDict>()?;

let mut row = Vec::with_capacity(width);

for k in keys {
let val = record.get_item(k).unwrap();
let val = d.get_item(k).unwrap();
let val = val.extract::<Wrap<AnyValue>>()?.0;
row.push(val)
}
rows.push(Row(row))
}
Ok(rows)
Ok((rows, keys_first))
}
10 changes: 6 additions & 4 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ use crate::conversion::{ObjectValue, Wrap};
use crate::datatypes::PyDataType;
use crate::file::get_mmap_bytes_reader;
use crate::lazy::dataframe::PyLazyFrame;
use crate::prelude::{records_to_rows, str_to_null_strategy};
use crate::prelude::{dicts_to_rows, str_to_null_strategy};
use crate::utils::{downsample_str_to_rule, str_to_polarstype};
use crate::{
arrow_interop,
Expand Down Expand Up @@ -224,9 +224,11 @@ impl PyDataFrame {
}

#[staticmethod]
pub fn read_records(records: &PyAny) -> PyResult<Self> {
let rows = records_to_rows(records)?;
Self::finish_from_rows(rows)
pub fn read_dicts(dicts: &PyAny) -> PyResult<Self> {
let (rows, names) = dicts_to_rows(dicts)?;
let mut pydf = Self::finish_from_rows(rows)?;
pydf.df.set_column_names(&names).map_err(PyPolarsEr::from)?;
Ok(pydf)
}

pub fn to_csv(&self, py_f: PyObject, has_headers: bool, delimiter: u8) -> PyResult<()> {
Expand Down
7 changes: 3 additions & 4 deletions py-polars/tests/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,14 +195,15 @@ def test_init_errors():


def test_init_records():
records = [
dicts = [
{"a": 1, "b": 2},
{"b": 1, "a": 2},
{"a": 1, "b": 2},
]
df = pl.DataFrame(records)
df = pl.DataFrame(dicts)
expected = pl.DataFrame({"a": [1, 2, 1], "b": [2, 1, 2]})
assert df.frame_equal(expected)
assert df.to_dicts() == dicts


def test_selection():
Expand Down Expand Up @@ -1042,8 +1043,6 @@ def test_slicing():
{
"d": ["u", "u", "d", "c", "c", "d", "d"] * n,
"v1": [None, "help", None, None, None, None, None] * n,
"v2": [None, "help", None, None, None, None, None] * n,
"v3": [None, "help", None, None, None, None, None] * n,
}
)

Expand Down

0 comments on commit 677d69b

Please sign in to comment.