In [1]:
import redframes as rf

In [2]:
def markdown(df: rf.DataFrame, index: bool = False):
    mrkdstr = df._data.to_markdown(index=index)
    print(mrkdstr, "\n")

In [3]:
df = rf.DataFrame({"foo": [1, 2, 3, 4]})
markdown(df)
df1 = df.accumulate("foo", into="cumsum")
markdown(df1);

|   foo |
|------:|
|     1 |
|     2 |
|     3 |
|     4 | 

|   foo |   cumsum |
|------:|---------:|
|     1 |        1 |
|     2 |        3 |
|     3 |        6 |
|     4 |       10 | 



In [4]:
df1 = rf.DataFrame({"foo": [1, 2], "bar": ["A", "B"]})
markdown(df1)
df2 = rf.DataFrame({"bar": ["C", "D"], "foo": [3, 4], "baz": ["$", "@"]})
markdown(df2)
df = df1.append(df2)
markdown(df)

|   foo | bar   |
|------:|:------|
|     1 | A     |
|     2 | B     | 

| bar   |   foo | baz   |
|:------|------:|:------|
| C     |     3 | $     |
| D     |     4 | @     | 

|   foo | bar   | baz   |
|------:|:------|:------|
|     1 | A     | nan   |
|     2 | B     | nan   |
|     3 | C     | $     |
|     4 | D     | @     | 



In [5]:
df = rf.DataFrame({"foo": [1, 2], "bar": ["A", "B"]})
markdown(df)
df1 = df.combine(["bar", "foo"], into="baz", sep="::", drop=True)
markdown(df1)

|   foo | bar   |
|------:|:------|
|     1 | A     |
|     2 | B     | 

| baz   |
|:------|
| A::1  |
| B::2  | 



In [6]:
df = rf.DataFrame({"foo": [1, 1, 2, 2], "bar": ["A", "A", "B", "A"]})
markdown(df)
df1 = df.dedupe()
markdown(df1)
df2 = df.dedupe("foo")
markdown(df2)
df3 = df.dedupe(["foo", "bar"])
markdown(df3)

|   foo | bar   |
|------:|:------|
|     1 | A     |
|     1 | A     |
|     2 | B     |
|     2 | A     | 

|   foo | bar   |
|------:|:------|
|     1 | A     |
|     2 | B     |
|     2 | A     | 

|   foo | bar   |
|------:|:------|
|     1 | A     |
|     2 | B     | 

|   foo | bar   |
|------:|:------|
|     1 | A     |
|     2 | B     |
|     2 | A     | 



In [7]:
df = rf.DataFrame({"foo": [1, None, 3, None, 5, 6], "bar": [1, None, 3, 4, None, None]})
markdown(df)
df1 = df.denix()
markdown(df1)
df2 = df.denix("bar")
markdown(df2)
df3 = df.denix(["foo", "bar"])
markdown(df3)

|   foo |   bar |
|------:|------:|
|     1 |     1 |
|   nan |   nan |
|     3 |     3 |
|   nan |     4 |
|     5 |   nan |
|     6 |   nan | 

|   foo |   bar |
|------:|------:|
|     1 |     1 |
|     3 |     3 | 

|   foo |   bar |
|------:|------:|
|     1 |     1 |
|     3 |     3 |
|   nan |     4 | 

|   foo |   bar |
|------:|------:|
|     1 |     1 |
|     3 |     3 | 



In [8]:
df = rf.DataFrame({"foo": [1, 2], "bar": [3, 4], "baz": [5, 6]})
markdown(df)
df1 = df.drop("baz")
markdown(df1)
df2 = df.drop(["foo", "baz"])
markdown(df2)

|   foo |   bar |   baz |
|------:|------:|------:|
|     1 |     3 |     5 |
|     2 |     4 |     6 | 

|   foo |   bar |
|------:|------:|
|     1 |     3 |
|     2 |     4 | 

|   bar |
|------:|
|     3 |
|     4 | 



In [9]:
df = rf.DataFrame({"foo": [1, None, None, 2, None], "bar": [None, "A", None, "B", None]})
markdown(df)
df1 = df.fill(constant=0)
markdown(df1)
df2 = df.fill(direction="down")
markdown(df2)
df3 = df.fill("foo", direction="down")
markdown(df3)
df4 = df.fill(["foo"], direction="up")
markdown(df4)
df5 = df.fill(["foo"], constant=0)
markdown(df5)

|   foo | bar   |
|------:|:------|
|     1 |       |
|   nan | A     |
|   nan |       |
|     2 | B     |
|   nan |       | 

|   foo | bar   |
|------:|:------|
|     1 | 0     |
|     0 | A     |
|     0 | 0     |
|     2 | B     |
|     0 | 0     | 

|   foo | bar   |
|------:|:------|
|     1 |       |
|     1 | A     |
|     1 | A     |
|     2 | B     |
|     2 | B     | 

|   foo | bar   |
|------:|:------|
|     1 |       |
|     1 | A     |
|     1 |       |
|     2 | B     |
|     2 |       | 

|   foo | bar   |
|------:|:------|
|     1 |       |
|     2 | A     |
|     2 |       |
|     2 | B     |
|   nan |       | 

|   foo | bar   |
|------:|:------|
|     1 |       |
|     0 | A     |
|     0 |       |
|     2 | B     |
|     0 |       | 



In [10]:
df = rf.DataFrame({"foo": ["A", "A", "A", "B"], "bar": [1, 2, 3, 4]})
markdown(df)
df1 = df.filter(lambda row: row["foo"].isin(["A"]))
markdown(df1)
df2 = df.filter(lambda row: (row["foo"] == "A") & (row["bar"] <= 2))
markdown(df2)
df3 = df.filter(lambda row: (row["foo"] == "B") | (row["bar"] == 1))
markdown(df3)

| foo   |   bar |
|:------|------:|
| A     |     1 |
| A     |     2 |
| A     |     3 |
| B     |     4 | 

| foo   |   bar |
|:------|------:|
| A     |     1 |
| A     |     2 |
| A     |     3 | 

| foo   |   bar |
|:------|------:|
| A     |     1 |
| A     |     2 | 

| foo   |   bar |
|:------|------:|
| A     |     1 |
| B     |     4 | 



In [11]:
df = rf.DataFrame({"foo": [1, 2], "bar": [3, 4], "baz": [4, 5]})
markdown(df)
df1 = df.gather()
markdown(df1)
df2 = df.gather(["foo"])
markdown(df2)
df3 = df.gather(["foo", "bar"], into=("var", "val"))
markdown(df3)

|   foo |   bar |   baz |
|------:|------:|------:|
|     1 |     3 |     4 |
|     2 |     4 |     5 | 

| variable   |   value |
|:-----------|--------:|
| foo        |       1 |
| foo        |       2 |
| bar        |       3 |
| bar        |       4 |
| baz        |       4 |
| baz        |       5 | 

|   bar |   baz | variable   |   value |
|------:|------:|:-----------|--------:|
|     3 |     4 | foo        |       1 |
|     4 |     5 | foo        |       2 | 

|   baz | var   |   val |
|------:|:------|------:|
|     4 | foo   |     1 |
|     5 | foo   |     2 |
|     4 | bar   |     3 |
|     5 | bar   |     4 | 



In [12]:
df = rf.DataFrame({"foo": ["A", "A", "A", "B", "B"], "bar": [1, 2, 3, 4, 5], "baz": [9, 7, 7, 5, 6]})
markdown(df)
df1 = df.group("foo").take(1)
markdown(df1)
df2 = df.group("foo").accumulate("bar", into="bar_cumsum")
markdown(df2)
df3 = df.group("foo").rank("baz", into="baz_rank", descending=True)
markdown(df3)
df4 = df.group("foo").summarize({
    "bar_mean": ("bar", rf.stat.mean), 
    "baz_min": ("baz", rf.stat.min)
})
markdown(df4)


| foo   |   bar |   baz |
|:------|------:|------:|
| A     |     1 |     9 |
| A     |     2 |     7 |
| A     |     3 |     7 |
| B     |     4 |     5 |
| B     |     5 |     6 | 

| foo   |   bar |   baz |
|:------|------:|------:|
| A     |     1 |     9 |
| B     |     4 |     5 | 

| foo   |   bar |   baz |   bar_cumsum |
|:------|------:|------:|-------------:|
| A     |     1 |     9 |            1 |
| A     |     2 |     7 |            3 |
| A     |     3 |     7 |            6 |
| B     |     4 |     5 |            4 |
| B     |     5 |     6 |            9 | 

| foo   |   bar |   baz |   baz_rank |
|:------|------:|------:|-----------:|
| A     |     1 |     9 |          1 |
| A     |     2 |     7 |          2 |
| A     |     3 |     7 |          2 |
| B     |     4 |     5 |          2 |
| B     |     5 |     6 |          1 | 

| foo   |   bar_mean |   baz_min |
|:------|-----------:|----------:|
| A     |        2   |         7 |
| B     |        4.5 |         5 | 



In [13]:
dfl = rf.DataFrame({"foo": ["A", "A", "B", "B"], "bar": [1, 2, 3, 4]})
markdown(dfl)
dfr = rf.DataFrame({"foo": ["A", "B"], "bar": [5, 6], "baz": ["!", None]})
markdown(dfr)
df = dfl.join(dfr, on="foo", how="left")
markdown(df)

| foo   |   bar |
|:------|------:|
| A     |     1 |
| A     |     2 |
| B     |     3 |
| B     |     4 | 

| foo   |   bar | baz   |
|:------|------:|:------|
| A     |     5 | !     |
| B     |     6 |       | 

| foo   |   bar_lhs |   bar_rhs | baz   |
|:------|----------:|----------:|:------|
| A     |         1 |         5 | !     |
| A     |         2 |         5 | !     |
| B     |         3 |         6 |       |
| B     |         4 |         6 |       | 



In [14]:
df = rf.DataFrame({"foo": [1, 2, 3]})
markdown(df)
df = df.mutate({
    "bar": lambda row: float(row["foo"]), 
    "baz": lambda row: "X" + str(row["bar"] * 2),
    "jaz": lambda _: "Jazz"
})
markdown(df)

|   foo |
|------:|
|     1 |
|     2 |
|     3 | 

|   foo |   bar | baz   | jaz   |
|------:|------:|:------|:------|
|     1 |     1 | X2.0  | Jazz  |
|     2 |     2 | X4.0  | Jazz  |
|     3 |     3 | X6.0  | Jazz  | 



In [15]:
df = rf.DataFrame({"foo": [2, 3, 3, 99, 1000, 1, -6, 4]})
markdown(df)
df = df.rank("foo", into="rank", descending=True)
markdown(df)

|   foo |
|------:|
|     2 |
|     3 |
|     3 |
|    99 |
|  1000 |
|     1 |
|    -6 |
|     4 | 

|   foo |   rank |
|------:|-------:|
|     2 |      5 |
|     3 |      4 |
|     3 |      4 |
|    99 |      2 |
|  1000 |      1 |
|     1 |      6 |
|    -6 |      7 |
|     4 |      3 | 



In [16]:
df = rf.DataFrame({"foo": [1, 2], "bar": [3, 4]})
markdown(df)
df = df.rename({"foo": "oof", "bar": "rab"})
markdown(df)

|   foo |   bar |
|------:|------:|
|     1 |     3 |
|     2 |     4 | 

|   oof |   rab |
|------:|------:|
|     1 |     3 |
|     2 |     4 | 



In [17]:
df = rf.DataFrame({"foo": [1, 2, 2, 2, 1], "bar": [1, "A", "B", True, False]})
markdown(df)
df = df.replace({
    "foo": {2: 222}, 
    "bar": {False: 0, True: 1, "A": 2, "B": 3}
})
markdown(df)

|   foo | bar   |
|------:|:------|
|     1 | 1     |
|     2 | A     |
|     2 | B     |
|     2 | True  |
|     1 | False | 

|   foo |   bar |
|------:|------:|
|     1 |     1 |
|   222 |     2 |
|   222 |     3 |
|   222 |     1 |
|     1 |     0 | 



In [18]:
df = rf.DataFrame({"foo": range(10), "bar": range(10, 20)})
markdown(df)
df1 = df.sample()
markdown(df1)
df2 = df.sample(1)
markdown(df2)
df3 = df.sample(0.3)
markdown(df3)
df4 = df.sample(3)
markdown(df4)

|   foo |   bar |
|------:|------:|
|     0 |    10 |
|     1 |    11 |
|     2 |    12 |
|     3 |    13 |
|     4 |    14 |
|     5 |    15 |
|     6 |    16 |
|     7 |    17 |
|     8 |    18 |
|     9 |    19 | 

|   foo |   bar |
|------:|------:|
|     7 |    17 | 

|   foo |   bar |
|------:|------:|
|     8 |    18 | 

|   foo |   bar |
|------:|------:|
|     6 |    16 |
|     8 |    18 |
|     1 |    11 | 

|   foo |   bar |
|------:|------:|
|     1 |    11 |
|     8 |    18 |
|     2 |    12 | 



In [19]:
df = rf.DataFrame({"foo": [1, 2], "bar": [3, 4], "baz": [5, 6]})
markdown(df)
df1 = df.select("foo")
markdown(df1)
df2 = df.select(["foo", "baz"])
markdown(df2)

|   foo |   bar |   baz |
|------:|------:|------:|
|     1 |     3 |     5 |
|     2 |     4 |     6 | 

|   foo |
|------:|
|     1 |
|     2 | 

|   foo |   baz |
|------:|------:|
|     1 |     5 |
|     2 |     6 | 



In [20]:
df = rf.DataFrame({"foo": range(5), "bar": range(5, 10)})
markdown(df)
df = df.shuffle()
markdown(df)

|   foo |   bar |
|------:|------:|
|     0 |     5 |
|     1 |     6 |
|     2 |     7 |
|     3 |     8 |
|     4 |     9 | 

|   foo |   bar |
|------:|------:|
|     2 |     7 |
|     1 |     6 |
|     0 |     5 |
|     3 |     8 |
|     4 |     9 | 



In [21]:
df = rf.DataFrame({"foo": ["Z", "X", "A", "A"], "bar": [2, -2, 4, -4]})
markdown(df)
df1 = df.sort("bar")
markdown(df1)
df2 = df.sort("bar", descending=True)
markdown(df2)
df3 = df.sort(["foo", "bar"], descending=False)
markdown(df3)

| foo   |   bar |
|:------|------:|
| Z     |     2 |
| X     |    -2 |
| A     |     4 |
| A     |    -4 | 

| foo   |   bar |
|:------|------:|
| A     |    -4 |
| X     |    -2 |
| Z     |     2 |
| A     |     4 | 

| foo   |   bar |
|:------|------:|
| A     |     4 |
| Z     |     2 |
| X     |    -2 |
| A     |    -4 | 

| foo   |   bar |
|:------|------:|
| A     |    -4 |
| A     |     4 |
| X     |    -2 |
| Z     |     2 | 



In [22]:
df = rf.DataFrame({"foo": ["A::1", "B::2", "C:3"]})
markdown(df)
df = df.split("foo", into=["foo", "bar"], sep="::", drop=True)
markdown(df)

| foo   |
|:------|
| A::1  |
| B::2  |
| C:3   | 

| foo   |   bar |
|:------|------:|
| A     |     1 |
| B     |     2 |
| C:3   |       | 



In [23]:
df = rf.DataFrame({"foo": ["A", "A", "A", "B", "B", "B", "B"], "bar": [1, 2, 3, 4, 5, 6, 7]})
markdown(df)
df = df.spread("foo", using="bar")
markdown(df)

| foo   |   bar |
|:------|------:|
| A     |     1 |
| A     |     2 |
| A     |     3 |
| B     |     4 |
| B     |     5 |
| B     |     6 |
| B     |     7 | 

|   A |   B |
|----:|----:|
|   1 |   4 |
|   2 |   5 |
|   3 |   6 |
| nan |   7 | 



In [24]:
df = rf.DataFrame({"foo": [1, 2, 3, 4, 5], "bar": [99, 100, 1, -5, 2]})
markdown(df)
df = df.summarize({
    "fcount": ("foo", rf.stat.count),
    "fmean": ("foo", rf.stat.mean),
    "fsum": ("foo", rf.stat.sum),
    "fmax": ("foo", rf.stat.max),
    "bmedian": ("bar", rf.stat.median),
    "bmin": ("bar", rf.stat.min),
    "bstd": ("bar", rf.stat.std)
})
markdown(df)

|   foo |   bar |
|------:|------:|
|     1 |    99 |
|     2 |   100 |
|     3 |     1 |
|     4 |    -5 |
|     5 |     2 | 

|   fcount |   fmean |   fsum |   fmax |   bmedian |   bmin |   bstd |
|---------:|--------:|-------:|-------:|----------:|-------:|-------:|
|        5 |       3 |     15 |      5 |         2 |     -5 |  54.93 | 



In [25]:
df = rf.DataFrame({"foo": range(10)})
markdown(df)
df1 = df.take(1)
markdown(df1)
df2 = df.take(-2)
markdown(df2)

|   foo |
|------:|
|     0 |
|     1 |
|     2 |
|     3 |
|     4 |
|     5 |
|     6 |
|     7 |
|     8 |
|     9 | 

|   foo |
|------:|
|     0 | 

|   foo |
|------:|
|     8 |
|     9 | 

