In [1]:
import numpy as np
import pandas as pd


np.random.seed(42)
data = {"nrs": [1, 2, 3, 4, 5], "random": np.random.rand(5)}
df = pd.DataFrame(data)
df
print(df)

   nrs    random
0    1  0.374540
1    2  0.950714
2    3  0.731994
3    4  0.598658
4    5  0.156019


## assign

### 新增a欄

In [2]:
(df.assign(a=[6, 7, 8, 9, 10]))
print((df.assign(a=[6, 7, 8, 9, 10])))

   nrs    random   a
0    1  0.374540   6
1    2  0.950714   7
2    3  0.731994   8
3    4  0.598658   9
4    5  0.156019  10


### 新增\*a\*欄

In [3]:
(df.assign(**{"*a*": [6, 7, 8, 9, 10]}))
print((df.assign(**{"*a*": [6, 7, 8, 9, 10]})))

   nrs    random  *a*
0    1  0.374540    6
1    2  0.950714    7
2    3  0.731994    8
3    4  0.598658    9
4    5  0.156019   10


### 新增a欄(lambda)

In [4]:
(df.assign(a=lambda df_: df_.nrs.add(1)))
print((df.assign(a=lambda df_: df_.nrs.add(1))))

   nrs    random  a
0    1  0.374540  2
1    2  0.950714  3
2    3  0.731994  4
3    4  0.598658  5
4    5  0.156019  6


### 同時新增a, b欄

In [5]:
(df.assign(a=lambda df_: df_.nrs.add(1), b=lambda df_: df_.a.add(1)))
print((df.assign(a=lambda df_: df_.nrs.add(1), b=lambda df_: df_.a.add(1))))

   nrs    random  a  b
0    1  0.374540  2  3
1    2  0.950714  3  4
2    3  0.731994  4  5
3    4  0.598658  5  6
4    5  0.156019  6  7


### 新增a欄(df)

In [6]:
(df.assign(a=df.nrs.add(1)))
print((df.assign(a=df.nrs.add(1))))

   nrs    random  a
0    1  0.374540  2
1    2  0.950714  3
2    3  0.731994  4
3    4  0.598658  5
4    5  0.156019  6


### 修改nrs欄位

In [7]:
(df.assign(nrs=lambda df_: df_.nrs.sub(1)))
print((df.assign(nrs=lambda df_: df_.nrs.sub(1))))

   nrs    random
0    0  0.374540
1    1  0.950714
2    2  0.731994
3    3  0.598658
4    4  0.156019


### 完全替換nrs欄位至另一型態

In [8]:
(df.assign(nrs=list("abcde")))
print((df.assign(nrs=list("abcde"))))

  nrs    random
0   a  0.374540
1   b  0.950714
2   c  0.731994
3   d  0.598658
4   e  0.156019


### \"."與"[]"

In [9]:
(df.assign(a=lambda df_: df_.nrs.add(1)))

(df.assign(a=lambda df_: df_["nrs"].add(1)))
print((df.assign(a=lambda df_: df_["nrs"].add(1))))

   nrs    random  a
0    1  0.374540  2
1    2  0.950714  3
2    3  0.731994  4
3    4  0.598658  5
4    5  0.156019  6


### Get attributes first, not column names

In [10]:
data = {"size": [1, 2, 3, 4, 5], "T": [4, 5, 6, 7, 8]}
df2 = pd.DataFrame(data)
print(df2.T)  # transpose
print(df2.size)  # 10

      0  1  2  3  4
size  1  2  3  4  5
T     4  5  6  7  8
10


## query

In [11]:
(df.query("nrs > random*10"))
print((df.query("nrs > random*10")))

   nrs    random
4    5  0.156019


In [12]:
target = 3
(df.query("nrs > @target"))
print((df.query("nrs > @target")))

   nrs    random
3    4  0.598658
4    5  0.156019


In [13]:
(df.query("nrs > @target | nrs==1"))
print((df.query("nrs > @target | nrs==1")))

   nrs    random
0    1  0.374540
3    4  0.598658
4    5  0.156019


In [14]:
(df.loc[df["nrs"] > df["random"] * 10])
print((df.loc[df["nrs"] > df["random"] * 10]))

   nrs    random
4    5  0.156019


In [15]:
(df.loc[df["nrs"] > target])
print((df.loc[df["nrs"] > target]))

   nrs    random
3    4  0.598658
4    5  0.156019


In [16]:
(df.loc[(df["nrs"] > target) | (df["nrs"] == 1)])
print((df.loc[(df["nrs"] > target) | (df["nrs"] == 1)]))

   nrs    random
0    1  0.374540
3    4  0.598658
4    5  0.156019


In [17]:
# ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
# (
#     df.loc[df["nrs"] > target | df["nrs"]== 1]
# )

## pipe

In [18]:
def change_value(df_, col_name, value):
    df_.loc[[0, 2], col_name] = value
    return df_


(
    df.assign(a=lambda df_: df_.nrs.add(1))
    .pipe(change_value, "a", 100)
    .assign(b=lambda df_: df_.a.add(1))
)

print(
    (
        df.assign(a=lambda df_: df_.nrs.add(1))
        .pipe(change_value, "a", 100)
        .assign(b=lambda df_: df_.a.add(1))
    )
)

   nrs    random    a    b
0    1  0.374540  100  101
1    2  0.950714    3    4
2    3  0.731994  100  101
3    4  0.598658    5    6
4    5  0.156019    6    7
