<img src="https://github.com/danielscarvalho/data/blob/master/img/FIAP-logo.png?raw=True" style="float:right;" width="200px">

# DATA SCIENCE & STATISTICAL COMPUTING [》](https://www.fiap.com.br/)

## Dataframe & Python

### LAP Parte 2: “Cookbook"

https://pandas.pydata.org/docs/user_guide/cookbook.html#cookbook

Sugestão: Colocar a página do Cookbook lado a lado com este notebook para realizar as operações.

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame(
    {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]}
)

In [3]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [4]:
df.loc[df.AAA >= 5, "BBB"] = -1

In [5]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,-1,50
2,6,-1,-30
3,7,-1,-50


In [7]:
df.loc[df.AAA >= 5, ["BBB", "CCC"]] = 555

In [8]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,555,555
2,6,555,555
3,7,555,555


In [9]:
df.loc[df.AAA < 5, ["BBB", "CCC"]] = 2000


In [10]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,2000,2000
1,5,555,555
2,6,555,555
3,7,555,555


In [11]:
df_mask = pd.DataFrame(
    {"AAA": [True] * 4, "BBB": [False] * 4, "CCC": [True, False] * 2}
)

In [12]:
df.where(df_mask, -1000)

Unnamed: 0,AAA,BBB,CCC
0,4,-1000,2000
1,5,-1000,-1000
2,6,-1000,555
3,7,-1000,-1000


In [13]:
df = pd.DataFrame(
    {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]}
)


In [14]:
df


Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [15]:
df["logic"] = np.where(df["AAA"] > 5, "high", "low")

In [16]:
df

Unnamed: 0,AAA,BBB,CCC,logic
0,4,10,100,low
1,5,20,50,low
2,6,30,-30,high
3,7,40,-50,high


In [17]:
df = pd.DataFrame(
    {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]}
)


In [18]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [19]:
df[df.AAA <= 5]

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50


In [20]:
df[df.AAA > 5]

Unnamed: 0,AAA,BBB,CCC
2,6,30,-30
3,7,40,-50


In [21]:
df = pd.DataFrame(
    {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]}
)

In [22]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [23]:
df.loc[(df["BBB"] < 25) & (df["CCC"] >= -40), "AAA"]

0    4
1    5
Name: AAA, dtype: int64

In [24]:
df.loc[(df["BBB"] > 25) | (df["CCC"] >= -40), "AAA"]


0    4
1    5
2    6
3    7
Name: AAA, dtype: int64

In [25]:
df.loc[(df["BBB"] > 25) | (df["CCC"] >= 75), "AAA"] = 999


In [26]:
df

Unnamed: 0,AAA,BBB,CCC
0,999,10,100
1,5,20,50
2,999,30,-30
3,999,40,-50


In [27]:
df = pd.DataFrame(
    {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]}
)

In [28]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [29]:
aValue = 43.0


In [30]:
df.loc[(df.CCC - aValue).abs().argsort()]


Unnamed: 0,AAA,BBB,CCC
1,5,20,50
0,4,10,100
2,6,30,-30
3,7,40,-50


In [31]:
df = pd.DataFrame(
    {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]}
)

In [32]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [33]:
Crit1 = df.AAA <= 5.5


In [34]:
Crit2 = df.BBB == 10.0


In [35]:
Crit3 = df.CCC > -40.0


In [36]:
AllCrit = Crit1 & Crit2 & Crit3


In [37]:
import functools


In [38]:
CritList = [Crit1, Crit2, Crit3]


In [39]:
AllCrit = functools.reduce(lambda x, y: x & y, CritList)


In [40]:
df[AllCrit]


Unnamed: 0,AAA,BBB,CCC
0,4,10,100


In [41]:
df = pd.DataFrame(
    {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]}
)

In [42]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [43]:
df[(df.AAA <= 6) & (df.index.isin([0, 2, 4]))]


Unnamed: 0,AAA,BBB,CCC
0,4,10,100
2,6,30,-30


In [45]:
df = pd.DataFrame(
    {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]},
    index=["foo", "bar", "boo", "kar"],
)


In [46]:
df.loc["bar":"kar"]  # Label


Unnamed: 0,AAA,BBB,CCC
bar,5,20,50
boo,6,30,-30
kar,7,40,-50


In [47]:
df[0:3]


Unnamed: 0,AAA,BBB,CCC
foo,4,10,100
bar,5,20,50
boo,6,30,-30


In [48]:
df["bar":"kar"]


Unnamed: 0,AAA,BBB,CCC
bar,5,20,50
boo,6,30,-30
kar,7,40,-50


In [49]:
data = {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]}


In [50]:
df2 = pd.DataFrame(data=data, index=[1, 2, 3, 4])  # Note index starts at 1.


In [51]:
df2.iloc[1:3]  # Position-oriented


Unnamed: 0,AAA,BBB,CCC
2,5,20,50
3,6,30,-30


In [52]:
df2.loc[1:3]  # Label-oriented


Unnamed: 0,AAA,BBB,CCC
1,4,10,100
2,5,20,50
3,6,30,-30


In [53]:
df = pd.DataFrame(
    {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]}
)

In [54]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [55]:
df[~((df.AAA <= 6) & (df.index.isin([0, 2, 4])))]


Unnamed: 0,AAA,BBB,CCC
1,5,20,50
3,7,40,-50
