In [1]:
import pandas as pd
import numpy as np

In [2]:
# Part1 if-then-else
# 第一种做法
df1 = pd.DataFrame(np.array([4,5,6,7,10,20,30,40,100,50,-30,-50]).reshape((3,4)).T,\
                   columns=['AAA','BBB','CCC'])
print(df1)

   AAA  BBB  CCC
0    4   10  100
1    5   20   50
2    6   30  -30
3    7   40  -50


In [3]:
# 分开执行：先执行一部分：if-then
df1.loc[df1['AAA'] >= 5,['BBB','CCC']] = 555
print(df1)

   AAA  BBB  CCC
0    4   10  100
1    5  555  555
2    6  555  555
3    7  555  555


In [4]:
# 再执行一部分:else
# 这样，'AAA'列，小于5时，另两列为2000，否则为555
df1.loc[df1['AAA'] < 5,['BBB','CCC']] = 2000
print(df1)

   AAA   BBB   CCC
0    4  2000  2000
1    5   555   555
2    6   555   555
3    7   555   555


In [5]:
# 第二种：直接套用 numpy 的 where函数 实现 if-then-else:
df2 = pd.DataFrame(\
    np.array([4,5,6,7,10,20,30,40,100,50,-30,-50]).reshape((3,4)).T,\
    columns=['AAA','BBB','CCC'])
df2['tj'] = np.where(df2['AAA'] > 5,'high','low')
print(df2)

   AAA  BBB  CCC    tj
0    4   10  100   low
1    5   20   50   low
2    6   30  -30  high
3    7   40  -50  high


In [6]:
# Part2 切割
# 用布尔条件切割 DataFrame
df1 = pd.DataFrame(np.array([4,5,6,7,10,20,30,40,100,50,-30,-50]).reshape((3,4)).T,\
                   columns=['AAA','BBB','CCC'])
print(df1)

   AAA  BBB  CCC
0    4   10  100
1    5   20   50
2    6   30  -30
3    7   40  -50


In [7]:
# 将df1表，按'AAA'列，以条件分割开来
a = df1[df1['AAA'] <= 5]
print(df1['AAA'] <= 5)
print('-'*30)
print(a)

0     True
1     True
2    False
3    False
Name: AAA, dtype: bool
------------------------------
   AAA  BBB  CCC
0    4   10  100
1    5   20   50


In [8]:
# df1 表剩下部分为
b = df1[df1['AAA'] > 5]
print(df1['AAA'] > 5)
print('-'*30)
print(b)

0    False
1    False
2     True
3     True
Name: AAA, dtype: bool
------------------------------
   AAA  BBB  CCC
2    6   30  -30
3    7   40  -50


In [9]:
# Part3 设置条件
# Part3-1 多列条件选择
# 和(&) 条件：eg，'BBB'列小于25，且，'CCC'列大于等于 -40时，取'AAA'列
c = df1.loc[(df1['BBB'] < 25) & (df1['CCC'] >= -40),'AAA']
print(c)

0    4
1    5
Name: AAA, dtype: int64


In [10]:
# 或(｜) 条件：eg，'BBB'列 小于25,或，'CCC'列大于等于 -40时，取'AAA'列
d = df1.loc[(df1['BBB'] < 25) | (df1['CCC'] >= -40),'AAA']
print(d)

0    4
1    5
2    6
Name: AAA, dtype: int64


In [11]:
# 或(｜) 条件：eg，'BBB'列 小于25,或，'CCC'列大于等于 -40时，赋值'AAA'列为 0.1
df1.loc[(df1['BBB'] < 25) | (df1['CCC'] >= -40),'AAA'] = 0.1
print(df1)

   AAA  BBB  CCC
0  0.1   10  100
1  0.1   20   50
2  0.1   30  -30
3  7.0   40  -50


In [12]:
# Part3-2 用 argsort 选择最接近指定值的行
df1 = pd.DataFrame({'AAA':[4,5,6,7],'BBB':[10,20,30,40],'CCC':[100,50,-30,-50]})
print(df1)

   AAA  BBB  CCC
0    4   10  100
1    5   20   50
2    6   30  -30
3    7   40  -50


In [13]:
aval = 43
e = df1.loc[(df1['CCC']-aval).abs().argsort()]
print(e)

   AAA  BBB  CCC
1    5   20   50
0    4   10  100
2    6   30  -30
3    7   40  -50


In [14]:
# Part3-3 用 二进制运算符 动态减少 条件列表
df1 = pd.DataFrame({'AAA':[4,5,6,7],'BBB':[10,20,30,40],'CCC':[100,50,-30,-50]})
print(df1)

   AAA  BBB  CCC
0    4   10  100
1    5   20   50
2    6   30  -30
3    7   40  -50


In [16]:
crit1 = df1['AAA'] <= 5.5
crit2 = df1['BBB'] == 10
crit3 = df1['CCC'] > -40
# 硬编码方式为
allcrit = crit1 & crit2 & crit3
print(df1[allcrit])

   AAA  BBB  CCC
0    4   10  100


In [17]:
# 动态条件列表为：
import functools
critlist = [crit1,crit2,crit3]
allcrit2 = functools.reduce(lambda x,y:x&y,critlist)
print(df1[allcrit2])

   AAA  BBB  CCC
0    4   10  100
