Idioms

In [1]:
import pandas as pd

In [6]:
df = pd.DataFrame({"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]})

In [3]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [4]:
df.loc[df.AAA >= 5, "BBB"] = -1

In [5]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,-1,50
2,6,-1,-30
3,7,-1,-50


In [7]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [9]:
df.loc[df.AAA >= 5, ["BBB","CCC"]] = -1

In [16]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,-1,-1
2,6,-1,-1
3,7,-1,-1


In [17]:
df = pd.DataFrame({"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]})

In [18]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [19]:
import numpy as np
df["logic"] = np.where(df["AAA"] > 5, "high", "low")

In [20]:
df

Unnamed: 0,AAA,BBB,CCC,logic
0,4,10,100,low
1,5,20,50,low
2,6,30,-30,high
3,7,40,-50,high


Splitting

In [21]:
df = pd.DataFrame({"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]})

In [22]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [24]:
df[df.AAA<6]

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50


In [26]:
df[df.BBB>10]

Unnamed: 0,AAA,BBB,CCC
1,5,20,50
2,6,30,-30
3,7,40,-50


In [27]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [28]:
df.loc[(df["BBB"] < 25) & (df["CCC"] >= -40), "AAA"]

0    4
1    5
Name: AAA, dtype: int64

In [29]:
df.loc[(df["BBB"] > 25) | (df["CCC"] >= 75), "AAA"] = 0.1

In [30]:
df

Unnamed: 0,AAA,BBB,CCC
0,0.1,10,100
1,5.0,20,50
2,0.1,30,-30
3,0.1,40,-50


In [32]:
df = pd.DataFrame({"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]})

In [33]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [39]:
aValue = 5.0

In [40]:
df.loc[(df.AAA - aValue).abs().argsort()]

Unnamed: 0,AAA,BBB,CCC
1,5,20,50
0,4,10,100
2,6,30,-30
3,7,40,-50


Dynamically reduce a list of criteria using a binary operators

In [41]:
df = pd.DataFrame({"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]})

In [42]:
Crit1 = df.AAA <= 5.5
Crit2 = df.BBB>=20
Crit3 = df.CCC>=-30
Allcrit = Crit1&Crit2&Crit3

In [43]:
df[Allcrit]

Unnamed: 0,AAA,BBB,CCC
1,5,20,50


In [45]:
df = pd.DataFrame({"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]})
df.loc[(df.AAA>4) & (df.index.isin([0,1,2]))]

Unnamed: 0,AAA,BBB,CCC
1,5,20,50
2,6,30,-30


In [47]:
df = pd.DataFrame({"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]},index=["foo", "bar", "boo", "kar"])


In [48]:
df

Unnamed: 0,AAA,BBB,CCC
foo,4,10,100
bar,5,20,50
boo,6,30,-30
kar,7,40,-50


In [49]:
df.loc["bar":"kar"]

Unnamed: 0,AAA,BBB,CCC
bar,5,20,50
boo,6,30,-30
kar,7,40,-50


In [50]:
df[0:3]

Unnamed: 0,AAA,BBB,CCC
foo,4,10,100
bar,5,20,50
boo,6,30,-30


In [61]:
df.iloc[0:5, 0:2]

Unnamed: 0,AAA,BBB
foo,4,10
bar,5,20
boo,6,30
kar,7,40


In [79]:
df = pd.DataFrame({"AAA": [1, 2, 1, 2], "BBB": [2, 2, 2, 2], "CCC": [3, 3, 3, 3]})

In [80]:
source_cols = df.columns

In [81]:
new_cols = [str(x) + "_cat" for x in source_cols]
categories = {1: "Alpha", 2: "Beta", 3: "Charlie"}
df[new_cols] = df[source_cols].applymap(categories.get)
df

Unnamed: 0,AAA,BBB,CCC,AAA_cat,BBB_cat,CCC_cat
0,1,2,3,Alpha,Beta,Charlie
1,2,2,3,Beta,Beta,Charlie
2,1,2,3,Alpha,Beta,Charlie
3,2,2,3,Beta,Beta,Charlie


In [82]:
df = pd.DataFrame(
{"animal": "cat dog cat fish dog cat cat".split(),
"size": list("SSMMMLL"),
"weight": [8, 10, 11, 1, 20, 12, 12],
"adult": [False] * 5 + [True] * 2,})

In [83]:
df

Unnamed: 0,animal,size,weight,adult
0,cat,S,8,False
1,dog,S,10,False
2,cat,M,11,False
3,fish,M,1,False
4,dog,M,20,False
5,cat,L,12,True
6,cat,L,12,True


In [84]:
gb = df.groupby(["animal"])

In [86]:
gb.get_group("cat")

Unnamed: 0,animal,size,weight,adult
0,cat,S,8,False
2,cat,M,11,False
5,cat,L,12,True
6,cat,L,12,True
