## Index Alignment

In [4]:
import pandas as pd
import numpy as np

In [9]:
A = pd.DataFrame(np.random.randint(0, 20, (2, 2)),
                 columns=list('AB'))
A

Unnamed: 0,A,B
0,0,8
1,6,8


In [10]:
B = pd.DataFrame(np.random.randint(0, 10, (3, 3)),
                 columns=list('BAC'))
B

Unnamed: 0,B,A,C
0,3,6,3
1,6,3,8
2,6,5,9


In [11]:
A + B

Unnamed: 0,A,B,C
0,6.0,11.0,
1,9.0,14.0,
2,,,


## Operations Between DataFrame and Series

In [12]:
rng = np.random

In [13]:
A = rng.randint(10, size=(3, 4))
A

array([[9, 9, 6, 8],
       [2, 5, 0, 4],
       [3, 7, 9, 5]])

In [14]:
A - A[0]

array([[ 0,  0,  0,  0],
       [-7, -4, -6, -4],
       [-6, -2,  3, -3]])

In [15]:
df = pd.DataFrame(A, columns=list('QRST'))
df - df.iloc[0]

Unnamed: 0,Q,R,S,T
0,0,0,0,0
1,-7,-4,-6,-4
2,-6,-2,3,-3


In [16]:
df.subtract(df['R'], axis=0)

Unnamed: 0,Q,R,S,T
0,0,0,-3,-1
1,-3,0,-5,-1
2,-4,0,2,-2


In [20]:
df

Unnamed: 0,Q,R,S,T
0,9,9,6,8
1,2,5,0,4
2,3,7,9,5


In [17]:
halfrow = df.iloc[0, ::2]
halfrow

Q    9
S    6
Name: 0, dtype: int32

In [18]:
df - halfrow

Unnamed: 0,Q,R,S,T
0,0.0,,0.0,
1,-7.0,,-6.0,
2,-6.0,,3.0,


## DF Ufuncs (Universal functions)

DataFrame.add 
- *Add DataFrames.*

DataFrame.sub
- *Subtract DataFrames.*

DataFrame.mul
- *Multiply DataFrames.*

*DataFrame.div*
- Divide DataFrames (float division).

*DataFrame.truediv*
- Divide DataFrames (float division).

*DataFrame.floordiv*
- Divide DataFrames (integer division).

*DataFrame.mod*
- Calculate modulo (remainder after division).

*DataFrame.pow*
- Calculate exponential power.



## Pandas aggregating functions

DataFrame.sum
- *Return the sum over the requested axis.*

DataFrame.min
- *Return the minimum over the requested axis.*

DataFrame.max
- *Return the maximum over the requested axis.*

DataFrame.idxmin
- *Return the index of the minimum over the requested axis.*

DataFrame.idxmax
- *Return the index of the maximum over the requested axis.*

### Exercícios

1. Escreva uma função para normalizar um DataFrame do pandas df subtraindo os valores da primeira linha de todas as outras linhas.

In [3]:
def normalize_df(df):
    return df.subtract(df.iloc[0])

2. Escreva uma função para normalizar um DataFrame do pandas df subtraindo os valores da primeira linha de todas as outras linhas e dividindo todos os valores pelo maior elemento do dataframe.

In [2]:
def normalize_df(df):
    first_row = df.iloc[0] # Seleciona a primeira linha do DataFrame
    df = df.subtract(first_row, axis=1) # Subtrai a primeira linha do DataFrame de todas as outras linhas
    max_val = df.max().max() # Calcula o maior elemento do DataFrame
    return df.divide(max_val) # Divide todos os elementos do DataFrame pelo maior elemento

# Exemplo de uso
import pandas as pd
import numpy as np

# Criando um DataFrame de exemplo
data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
df = pd.DataFrame(data, columns=['A', 'B', 'C'])

# Normalizando o DataFrame
df_normalized = normalize_df(df)

# Exibindo o DataFrame normalizado
print(df_normalized)

     A    B    C
0  0.0  0.0  0.0
1  0.5  0.5  0.5
2  1.0  1.0  1.0


## Missing data

In [5]:
df = pd.DataFrame([[1,      np.nan, 2],
                   [2,      3,      5],
                   [None, 4,      6]])
df

Unnamed: 0,0,1,2
0,1.0,,2
1,2.0,3.0,5
2,,4.0,6


In [6]:
df.dropna()

Unnamed: 0,0,1,2
1,2.0,3.0,5


In [7]:
df.dropna(axis='columns')

Unnamed: 0,2
0,2
1,5
2,6


In [8]:
df[3] = np.nan
df


Unnamed: 0,0,1,2,3
0,1.0,,2,
1,2.0,3.0,5,
2,,4.0,6,


In [20]:
df.isna()

Unnamed: 0,0,1,2,3
0,False,True,False,True
1,False,False,False,True
2,True,False,False,True


In [21]:
df.isna().sum()

0    1
1    1
2    0
3    3
dtype: int64

In [9]:
df.dropna(axis='columns', how='all')

Unnamed: 0,0,1,2
0,1.0,,2
1,2.0,3.0,5
2,,4.0,6


In [10]:
df.dropna(axis='rows', thresh=3)

Unnamed: 0,0,1,2,3
1,2.0,3.0,5,


In [11]:
df.fillna(0)

Unnamed: 0,0,1,2,3
0,1.0,0.0,2,0.0
1,2.0,3.0,5,0.0
2,0.0,4.0,6,0.0


In [15]:
df

Unnamed: 0,0,1,2,3
0,1.0,,2,
1,2.0,3.0,5,
2,,4.0,6,


In [14]:
# forward-fill
df.fillna(method='ffill')

Unnamed: 0,0,1,2,3
0,1.0,,2,
1,2.0,3.0,5,
2,2.0,4.0,6,


In [16]:
# back-fill
df.fillna(method='bfill')

Unnamed: 0,0,1,2,3
0,1.0,3.0,2,
1,2.0,3.0,5,
2,,4.0,6,


In [18]:
df

Unnamed: 0,0,1,2,3
0,1.0,,2,
1,2.0,3.0,5,
2,,4.0,6,


In [19]:
df.fillna(method='ffill', axis='columns')

Unnamed: 0,0,1,2,3
0,1.0,1.0,2.0,2.0
1,2.0,3.0,5.0,5.0
2,,4.0,6.0,6.0


## Exercícios

Considere o seguinte dataframe

In [28]:
df = pd.DataFrame({'A': [1, 2, np.nan, 4, 5],
                   'B': [6, np.nan, 8, 9, 10],
                   'C': [11, 12, 13, np.nan, 15],
                   'D': [16, 17, 18, 19, np.nan]})

1. Identifique quantos valores faltantes há em cada coluna do DataFrame.
2. Substitua os valores faltantes por 0.
3. Substitua os valores faltantes pela média da coluna.

In [29]:
print(df.isna().sum())

A    1
B    1
C    1
D    1
dtype: int64


In [30]:
df.fillna(0, inplace=False)

Unnamed: 0,A,B,C,D
0,1.0,6.0,11.0,16.0
1,2.0,0.0,12.0,17.0
2,0.0,8.0,13.0,18.0
3,4.0,9.0,0.0,19.0
4,5.0,10.0,15.0,0.0


In [33]:
df.mean()

A     3.00
B     8.25
C    12.75
D    17.50
dtype: float64

In [31]:
df.fillna(df.mean(), inplace=True)