Skip to content

Commit

Permalink
refactor[python]: Standardize class method order (#4690)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Sep 2, 2022
1 parent 7da9d21 commit e81a904
Show file tree
Hide file tree
Showing 3 changed files with 279 additions and 279 deletions.
12 changes: 6 additions & 6 deletions py-polars/polars/internals/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,6 @@ def _from_pyexpr(cls, pyexpr: PyExpr) -> Expr:
expr._pyexpr = pyexpr
return expr

def __str__(self) -> str:
return self._pyexpr.to_str()

def _repr_html_(self) -> str:
return self._pyexpr.to_str()

def _to_pyexpr(self, other: Any) -> PyExpr:
return self._to_expr(other)._pyexpr

Expand All @@ -151,6 +145,12 @@ def _to_expr(self, other: Any) -> Expr:
return other
return pli.lit(other)

def _repr_html_(self) -> str:
return self._pyexpr.to_str()

def __str__(self) -> str:
return self._pyexpr.to_str()

def __bool__(self) -> Expr:
raise ValueError(
"Since Expr are lazy, the truthiness of an Expr is ambiguous. "
Expand Down
254 changes: 127 additions & 127 deletions py-polars/polars/internals/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,133 @@ def read_json(

return wrap_ldf(PyLazyFrame.read_json(file))

@classmethod
def _scan_python_function(
cls, schema: pa.schema | dict[str, type[DataType]], scan_fn: bytes
) -> LazyFrame:
self = cls.__new__(cls)
if isinstance(schema, dict):
self._ldf = PyLazyFrame.scan_from_python_function_pl_schema(
[(name, dt) for name, dt in schema.items()], scan_fn
)
else:
self._ldf = PyLazyFrame.scan_from_python_function_arrow_schema(
list(schema), scan_fn
)
return self

@property
def columns(self) -> list[str]:
"""
Get or set column names.
Examples
--------
>>> df = (
... pl.DataFrame(
... {
... "foo": [1, 2, 3],
... "bar": [6, 7, 8],
... "ham": ["a", "b", "c"],
... }
... )
... .lazy()
... .select(["foo", "bar"])
... )
>>> df.columns
['foo', 'bar']
"""
return self._ldf.columns()

@property
def dtypes(self) -> list[type[DataType]]:
"""
Get dtypes of columns in LazyFrame.
Examples
--------
>>> lf = pl.DataFrame(
... {
... "foo": [1, 2, 3],
... "bar": [6.0, 7.0, 8.0],
... "ham": ["a", "b", "c"],
... }
... ).lazy()
>>> lf.dtypes
[<class 'polars.datatypes.Int64'>, <class 'polars.datatypes.Float64'>, <class 'polars.datatypes.Utf8'>]
See Also
--------
schema : Returns a {colname:dtype} mapping.
""" # noqa: E501
return self._ldf.dtypes()

@property
def schema(self) -> Schema:
"""
Get a dict[column name, DataType].
Examples
--------
>>> lf = pl.DataFrame(
... {
... "foo": [1, 2, 3],
... "bar": [6.0, 7.0, 8.0],
... "ham": ["a", "b", "c"],
... }
... ).lazy()
>>> lf.schema
{'foo': <class 'polars.datatypes.Int64'>, 'bar': <class 'polars.datatypes.Float64'>, 'ham': <class 'polars.datatypes.Utf8'>}
""" # noqa: E501
return self._ldf.schema()

def __contains__(self: LDF, key: str) -> bool:
return key in self.columns

def __copy__(self: LDF) -> LDF:
return self.clone()

def __deepcopy__(self: LDF, memo: None = None) -> LDF:
return self.clone()

def __getitem__(self: LDF, item: int | range | slice) -> LazyFrame:
if not isinstance(item, slice):
raise TypeError(
"'LazyFrame' object is not subscriptable (aside from slicing). Use"
" 'select()' or 'filter()' instead."
)
return LazyPolarsSlice(self).apply(item)

def __str__(self) -> str:
return f"""\
naive plan: (run LazyFrame.describe_optimized_plan() to see the optimized plan)
{self.describe_plan()}\
"""

def _repr_html_(self) -> str:
try:
dot = self._ldf.to_dot(optimized=False)
svg = subprocess.check_output(
["dot", "-Nshape=box", "-Tsvg"], input=f"{dot}".encode()
)
return (
"<h4>NAIVE QUERY PLAN</h4><p>run <b>LazyFrame.show_graph()</b> to see"
f" the optimized version</p>{svg.decode()}"
)
except Exception:
insert = self.describe_plan().replace("\n", "<p></p>")

return f"""\
<i>naive plan: (run <b>LazyFrame.describe_optimized_plan()</b> to see the optimized plan)</i>
<p></p>
<div>{insert}</div>\
""" # noqa: E501

@overload
def write_json(
self,
Expand Down Expand Up @@ -361,32 +488,6 @@ def write_json(
self._ldf.write_json(file)
return None

@classmethod
def _scan_python_function(
cls, schema: pa.schema | dict[str, type[DataType]], scan_fn: bytes
) -> LazyFrame:
self = cls.__new__(cls)
if isinstance(schema, dict):
self._ldf = PyLazyFrame.scan_from_python_function_pl_schema(
[(name, dt) for name, dt in schema.items()], scan_fn
)
else:
self._ldf = PyLazyFrame.scan_from_python_function_arrow_schema(
list(schema), scan_fn
)
return self

def __getitem__(self: LDF, item: int | range | slice) -> LazyFrame:
if not isinstance(item, slice):
raise TypeError(
"'LazyFrame' object is not subscriptable (aside from slicing). Use"
" 'select()' or 'filter()' instead."
)
return LazyPolarsSlice(self).apply(item)

def __contains__(self: LDF, key: str) -> bool:
return key in self.columns

def pipe(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
"""
Apply a function on Self.
Expand Down Expand Up @@ -425,32 +526,6 @@ def pipe(self, func: Callable[..., Any], *args: Any, **kwargs: Any) -> Any:
"""
return func(self, *args, **kwargs)

def _repr_html_(self) -> str:
try:
dot = self._ldf.to_dot(optimized=False)
svg = subprocess.check_output(
["dot", "-Nshape=box", "-Tsvg"], input=f"{dot}".encode()
)
return (
"<h4>NAIVE QUERY PLAN</h4><p>run <b>LazyFrame.show_graph()</b> to see"
f" the optimized version</p>{svg.decode()}"
)
except Exception:
insert = self.describe_plan().replace("\n", "<p></p>")

return f"""\
<i>naive plan: (run <b>LazyFrame.describe_optimized_plan()</b> to see the optimized plan)</i>
<p></p>
<div>{insert}</div>\
""" # noqa: E501

def __str__(self) -> str:
return f"""\
naive plan: (run LazyFrame.describe_optimized_plan() to see the optimized plan)
{self.describe_plan()}\
"""

def describe_plan(self) -> str:
"""Create a string representation of the unoptimized query plan."""
return self._ldf.describe_plan()
Expand Down Expand Up @@ -745,75 +820,6 @@ def lazy(self: LDF) -> LDF:
"""
return self

@property
def columns(self) -> list[str]:
"""
Get or set column names.
Examples
--------
>>> df = (
... pl.DataFrame(
... {
... "foo": [1, 2, 3],
... "bar": [6, 7, 8],
... "ham": ["a", "b", "c"],
... }
... )
... .lazy()
... .select(["foo", "bar"])
... )
>>> df.columns
['foo', 'bar']
"""
return self._ldf.columns()

@property
def dtypes(self) -> list[type[DataType]]:
"""
Get dtypes of columns in LazyFrame.
Examples
--------
>>> lf = pl.DataFrame(
... {
... "foo": [1, 2, 3],
... "bar": [6.0, 7.0, 8.0],
... "ham": ["a", "b", "c"],
... }
... ).lazy()
>>> lf.dtypes
[<class 'polars.datatypes.Int64'>, <class 'polars.datatypes.Float64'>, <class 'polars.datatypes.Utf8'>]
See Also
--------
schema : Returns a {colname:dtype} mapping.
""" # noqa: E501
return self._ldf.dtypes()

@property
def schema(self) -> Schema:
"""
Get a dict[column name, DataType].
Examples
--------
>>> lf = pl.DataFrame(
... {
... "foo": [1, 2, 3],
... "bar": [6.0, 7.0, 8.0],
... "ham": ["a", "b", "c"],
... }
... ).lazy()
>>> lf.schema
{'foo': <class 'polars.datatypes.Int64'>, 'bar': <class 'polars.datatypes.Float64'>, 'ham': <class 'polars.datatypes.Utf8'>}
""" # noqa: E501
return self._ldf.schema()

def cache(self: LDF) -> LDF:
"""Cache the result once the execution of the physical plan hits this node."""
return self._from_pyldf(self._ldf.cache())
Expand Down Expand Up @@ -861,12 +867,6 @@ def clone(self: LDF) -> LDF:
"""
return self._from_pyldf(self._ldf.clone())

def __copy__(self: LDF) -> LDF:
return self.clone()

def __deepcopy__(self: LDF, memo: None = None) -> LDF:
return self.clone()

def filter(self: LDF, predicate: pli.Expr | str | pli.Series | list[bool]) -> LDF:
"""
Filter the rows in the DataFrame based on a predicate expression.
Expand Down

0 comments on commit e81a904

Please sign in to comment.