In [1]:
import pandas as pd
import numpy as np

# Idioms

In [2]:
df = pd.DataFrame(
    {"AAA":[4,5,6,7], "BBB":[10,20,30,40], "CCC":[100,50,-30,-50]}
)
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


## If-then...

In [3]:
df.loc[df.AAA >=5, "BBB"] = -1
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,-1,50
2,6,-1,-30
3,7,-1,-50


In [4]:
df.loc[df.AAA >= 5, ["BBB", "CCC"]] = 555
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,555,555
2,6,555,555
3,7,555,555


In [5]:
df.loc[df.AAA <5, ["BBB", "CCC"]] = 2000
df

Unnamed: 0,AAA,BBB,CCC
0,4,2000,2000
1,5,555,555
2,6,555,555
3,7,555,555


In [6]:
df_mask = pd.DataFrame(
    {"AAA":[True]*4, "BBB":[False]*4, "CCC":[True, False]*2}
)

df_mask

Unnamed: 0,AAA,BBB,CCC
0,True,False,True
1,True,False,False
2,True,False,True
3,True,False,False


In [7]:
df.where(df_mask, -1000)

Unnamed: 0,AAA,BBB,CCC
0,4,-1000,2000
1,5,-1000,-1000
2,6,-1000,555
3,7,-1000,-1000


In [8]:
df = pd.DataFrame(
    {"AAA":[4,5,6,7], "BBB":[10,20,30,40], "CCC":[100,50,-30,-50]}
)
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [9]:
df["logic"] = np.where(df["AAA"] > 5, "hight", "low")
df

Unnamed: 0,AAA,BBB,CCC,logic
0,4,10,100,low
1,5,20,50,low
2,6,30,-30,hight
3,7,40,-50,hight


In [10]:
df = pd.DataFrame(
    { "one":[1,2,3,4,5], "two":[6,7,8,9,10]
    }
)

df

Unnamed: 0,one,two
0,1,6
1,2,7
2,3,8
3,4,9
4,5,10


In [11]:
df_change = df["one"].apply(lambda x: x*2 if x<=2 else(x**2 if x<=4 else x+10))
df_change

#df["one"].apply(lambda x: x*10 if x<2 else (x**2 if x<4 else x+10))

0     2
1     4
2     9
3    16
4    15
Name: one, dtype: int64

## Spliting

In [12]:
df = pd.DataFrame(
    {"AAA":[4,5,6,7], "BBB":[10,20,39,40], "CCC":[100,50,-30,-50]}
)
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,39,-30
3,7,40,-50


In [13]:
df[df.AAA <= 5]

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50


In [14]:
df[df.AAA > 5]

Unnamed: 0,AAA,BBB,CCC
2,6,39,-30
3,7,40,-50


## Building criteria

In [15]:
df = pd.DataFrame(
    {"AAA":[4,5,6,7], "BBB":[10,20,30,40], "CCC":[100,50,-30,-50]}
)
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [16]:
df_t = df.loc[(df["AAA"]>5) & (df["BBB"]>20),"CCC" ]
df_t

2   -30
3   -50
Name: CCC, dtype: int64

In [17]:
df_s = df.loc[(df["AAA"]>5) | (df["BBB"]>30), "CCC"]
df_s

2   -30
3   -50
Name: CCC, dtype: int64

In [18]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [19]:
df.loc[(df["BBB"]>25) | (df["CCC"]>=75), "AAA"] = 0.1
df

Unnamed: 0,AAA,BBB,CCC
0,0.1,10,100
1,5.0,20,50
2,0.1,30,-30
3,0.1,40,-50


In [20]:
df = pd.DataFrame(
    {"AAA":[4,5,6,7], "BBB":[10,20,30,40], "CCC":[100,50,-30,-50]}
)
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [21]:
aValue = 43.0
df.loc[(df.CCC - aValue).abs().argsort()]

Unnamed: 0,AAA,BBB,CCC
1,5,20,50
0,4,10,100
2,6,30,-30
3,7,40,-50


In [22]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [32]:
df.loc[df["BBB"] >20] = 0
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,0,0,0
3,0,0,0


In [39]:
df[df["BBB"].notnull()]["CCC"]

0    100
1     50
2      0
3      0
Name: CCC, dtype: int64