Skip to content

Commit

Permalink
ph learns slice
Browse files Browse the repository at this point in the history
  • Loading branch information
Pål Grønås Drange committed Apr 11, 2020
1 parent adb5c9b commit 566e224
Show file tree
Hide file tree
Showing 4 changed files with 143 additions and 1 deletion.
52 changes: 52 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ a pipeline.
* [`columns`, listing, selecting and re-ordering of](#columns-listing-selecting-and-re-ordering-of)
* [`rename`](#rename)
* [`replace`](#replace)
* [`slice`](#slice)
* [`eval`; Mathematipulating and creating new columns](#eval-mathematipulating-and-creating-new-columns)
* [`normalize`](#normalize)
* [`query`](#query)
Expand Down Expand Up @@ -594,6 +595,57 @@ x,y,xp
```



#### `slice`

Slicing in Python is essential, and occasionally, we want to slice
tabular data, e.g. look at only the 100 first, or 100 last rows, or
perhaps we want to look at only every 10th row. All of this is achieved
using `ph slice start:end:step` with standard Python slice syntax.

```bash
$ cat a.csv | ph slice 1:9:2
x,y
4,9
6,11
8,13
```

Reversing:

```
$ cat a.csv|ph slice ::-1
x,y
8,13
7,12
6,11
5,10
4,9
3,8
```

See also `ph head` and `ph tail`.

```bash
$ cat a.csv | ph slice :3
x,y
3,8
4,9
5,10
```

equivalent to

```bash
$ cat a.csv | ph head 3
x,y
3,8
4,9
5,10
```



#### `eval`; Mathematipulating and creating new columns

You can sum columns and place the result in a new column using
Expand Down
45 changes: 45 additions & 0 deletions ph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1326,6 +1326,51 @@ def columns(*cols, **kwargs):
pipeout(df[cols])


def _parse_slice(slicestr):
pattern = ":<int> | <int>: | <int>:<int> | <int>:<int>:<int>"
error = "Input to slice is {} _not_ {}".format(pattern, slicestr)

assert ":" in slicestr, error
start = None
end = None
step = None
tup = slicestr.split(":")
if len(tup) > 3:
exit(error)
start = tup[0] or None
if start is not None:
start = int(start)
end = tup[1] or None
if end is not None:
end = int(end)
if len(tup) == 3:
step = tup[2] or None
if step is not None:
step = int(step)
return start, end, step


@registerx("slice")
def slice_(slicestr):
"""Slice a dataframe with Python slice pattern.
Usage: cat a.csv | ph slice :10 # head
cat a.csv | ph slice -10: # tail
cat a.csv | ph slice ::2 # every even row
cat a.csv | ph slice 1::2 # every odd row
cat a.csv | ph slice ::-1 # reverse file
"""
pattern = ":<int> | <int>: | <int>:<int> | <int>:<int>:<int>"
error = "Input to slice is {} _not_ {}".format(pattern, slicestr)
df = pipein()
if isinstance(slicestr, int) or ":" not in slicestr:
exit(error)
start, end, step = _parse_slice(slicestr)
retval = df[start:end:step]
pipeout(retval)


@register
def drop(*columns, **kwargs):
"""Drop specified labels from rows or columns.
Expand Down
2 changes: 1 addition & 1 deletion ph/_version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.3.0"
__version__ = "0.3.1"
45 changes: 45 additions & 0 deletions tests/test_ph.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,6 +633,51 @@ def test_replace_col_and_inf(phmgr):
assert list(captured.df.y) == list(range(8, 14))


def test_slice(phmgr):
with phmgr("a") as captured:
_call("slice 0:3")
assert not captured.err
captured.assert_shape(3, 2)
assert list(captured.df.x) == list(range(3, 6))
assert list(captured.df.y) == list(range(8, 11))


def test_slice_end(phmgr):
with phmgr("a") as captured:
_call("slice :3")
assert not captured.err
captured.assert_shape(3, 2)
assert list(captured.df.x) == list(range(3, 6))
assert list(captured.df.y) == list(range(8, 11))


def test_slice_start(phmgr):
with phmgr("a") as captured:
_call("slice 3:")
assert not captured.err
captured.assert_shape(3, 2)
assert list(captured.df.x) == list(range(6, 9))
assert list(captured.df.y) == list(range(11, 14))


def test_slice_start_step(phmgr):
with phmgr("a") as captured:
_call("slice 1::2")
assert not captured.err
captured.assert_shape(3, 2)
assert list(captured.df.x) == list(range(4, 9, 2))
assert list(captured.df.y) == list(range(9, 14, 2))


def test_slice_start_end_step(phmgr):
with phmgr("a") as captured:
_call("slice 1:5:2")
assert not captured.err
captured.assert_shape(2, 2)
assert list(captured.df.x) == list(range(4, 7, 2))
assert list(captured.df.y) == list(range(9, 12, 2))


def test_slugify_df(phmgr):
with phmgr("slugit") as captured:
_call("slugify")
Expand Down

0 comments on commit 566e224

Please sign in to comment.