# BIBLIOTECA PANDAS

## Importar Bibliotecas

In [104]:
import numpy as np
import pandas as pd

## Series (observe o índice do lado esquerdo)

In [105]:
s1 = pd.Series([1, 2, -5, 0])
s1

0    1
1    2
2   -5
3    0
dtype: int64

In [106]:
s1.values

array([ 1,  2, -5,  0])

In [107]:
s1.index

RangeIndex(start=0, stop=4, step=1)

In [108]:
s2 = pd.Series([1, 2, -5, 0], index=["a", "b", "c", "d"])
s2
s2.values
s2.index
s2
s2.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [109]:
s2["a"] = 1000
s2

a    1000
b       2
c      -5
d       0
dtype: int64

## Comparação

In [110]:
s2[s2 > 0]

a    1000
b       2
dtype: int64

## Álgebra

In [111]:
sr1 = pd.Series([1, 2, 3, 4, 5])
sr2 = pd.Series([5, 4, 3, 2, 1])
sr1 + sr2
sr1 - sr2
sr1 * sr2
sr1 / sr2

0    0.2
1    0.5
2    1.0
3    2.0
4    5.0
dtype: float64

In [112]:
s2.isnull()

a    False
b    False
c    False
d    False
dtype: bool

## DataFrame

### Criar DataFrame

In [113]:
dados = {"estado":["SP","MG","PR","SP","MG","PR"], "ano":[2019,2019,2019,2020,2020,2020], "pop":[45.9, 21.2, 16.9, 46.6, 21.4, 17.3]}
df1 = pd.DataFrame(dados)
df1

Unnamed: 0,estado,ano,pop
0,SP,2019,45.9
1,MG,2019,21.2
2,PR,2019,16.9
3,SP,2020,46.6
4,MG,2020,21.4
5,PR,2020,17.3


In [114]:
df1.head()

Unnamed: 0,estado,ano,pop
0,SP,2019,45.9
1,MG,2019,21.2
2,PR,2019,16.9
3,SP,2020,46.6
4,MG,2020,21.4


In [115]:
df1.tail()

Unnamed: 0,estado,ano,pop
1,MG,2019,21.2
2,PR,2019,16.9
3,SP,2020,46.6
4,MG,2020,21.4
5,PR,2020,17.3


In [116]:
df1.sample(2)

Unnamed: 0,estado,ano,pop
0,SP,2019,45.9
4,MG,2020,21.4


## Novo DataFrame Baseado no Anterior (veja as colunas)

In [117]:
df1
df2 = pd.DataFrame(dados, columns=["ano", "estado", "pop"])
df2

Unnamed: 0,ano,estado,pop
0,2019,SP,45.9
1,2019,MG,21.2
2,2019,PR,16.9
3,2020,SP,46.6
4,2020,MG,21.4
5,2020,PR,17.3


In [118]:
df2["estado"]
df2.estado
df2.dtypes

ano         int64
estado     object
pop       float64
dtype: object

## Atribuir Valores

In [119]:
df2["estimativa"] = 50
df2
df2["estimativa"] = np.arange(6)
df2

Unnamed: 0,ano,estado,pop,estimativa
0,2019,SP,45.9,0
1,2019,MG,21.2,1
2,2019,PR,16.9,2
3,2020,SP,46.6,3
4,2020,MG,21.4,4
5,2020,PR,17.3,5


In [120]:
df2["estimativa"][0] = 100
df2

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2["estimativa"][0] = 100


Unnamed: 0,ano,estado,pop,estimativa
0,2019,SP,45.9,100
1,2019,MG,21.2,1
2,2019,PR,16.9,2
3,2020,SP,46.6,3
4,2020,MG,21.4,4
5,2020,PR,17.3,5


In [121]:
df2["nao parana"] = (df2["estado"] != "PR")
df2

Unnamed: 0,ano,estado,pop,estimativa,nao parana
0,2019,SP,45.9,100,True
1,2019,MG,21.2,1,True
2,2019,PR,16.9,2,False
3,2020,SP,46.6,3,True
4,2020,MG,21.4,4,True
5,2020,PR,17.3,5,False


## Excluir Coluna

In [122]:
del df2["nao parana"]

In [123]:
df2

Unnamed: 0,ano,estado,pop,estimativa
0,2019,SP,45.9,100
1,2019,MG,21.2,1
2,2019,PR,16.9,2
3,2020,SP,46.6,3
4,2020,MG,21.4,4
5,2020,PR,17.3,5


## Entender o DataFrame

In [124]:
df2.shape

(6, 4)

In [125]:
df2.shape[0]

6

In [126]:
df2.shape[1]

4

In [127]:
df2.index

RangeIndex(start=0, stop=6, step=1)

In [128]:
df2.columns

Index(['ano', 'estado', 'pop', 'estimativa'], dtype='object')

In [129]:
df2.count()

ano           6
estado        6
pop           6
estimativa    6
dtype: int64

## Alterar Informações das Colunas

In [130]:
df2
df2.columns = ["ANO", "ESTADO", "POPULAÇÃO", "ESTIMATIVA"]
df2

Unnamed: 0,ANO,ESTADO,POPULAÇÃO,ESTIMATIVA
0,2019,SP,45.9,100
1,2019,MG,21.2,1
2,2019,PR,16.9,2
3,2020,SP,46.6,3
4,2020,MG,21.4,4
5,2020,PR,17.3,5


## Análise do DataFrame

In [131]:
df2.describe()
df2.describe(include="all")

Unnamed: 0,ANO,ESTADO,POPULAÇÃO,ESTIMATIVA
count,6.0,6,6.0,6.0
unique,,3,,
top,,SP,,
freq,,2,,
mean,2019.5,,28.216667,19.166667
std,0.547723,,14.096725,39.625329
min,2019.0,,16.9,1.0
25%,2019.0,,18.275,2.25
50%,2019.5,,21.3,3.5
75%,2020.0,,39.775,4.75


## Alterar Valores e Consultar Dados

In [132]:
df2["ANO"]
df2["ANO"] = df2["ANO"] + 2
df2

Unnamed: 0,ANO,ESTADO,POPULAÇÃO,ESTIMATIVA
0,2021,SP,45.9,100
1,2021,MG,21.2,1
2,2021,PR,16.9,2
3,2022,SP,46.6,3
4,2022,MG,21.4,4
5,2022,PR,17.3,5


In [133]:
df2[df2["ANO"] > 2023]
df3 = df2[df2["ANO"] > 2023]
df3

Unnamed: 0,ANO,ESTADO,POPULAÇÃO,ESTIMATIVA
3,2024,SP,46.6,3
4,2024,MG,21.4,4
5,2024,PR,17.3,5


Unnamed: 0,ANO,ESTADO,POPULAÇÃO,ESTIMATIVA


## Ver Dados de Linhas

In [134]:
df2

Unnamed: 0,ANO,ESTADO,POPULAÇÃO,ESTIMATIVA
0,2021,SP,45.9,100
1,2021,MG,21.2,1
2,2021,PR,16.9,2
3,2022,SP,46.6,3
4,2022,MG,21.4,4
5,2022,PR,17.3,5


In [135]:
df2.iloc[0]

ANO           2021
ESTADO          SP
POPULAÇÃO     45.9
ESTIMATIVA     100
Name: 0, dtype: object

In [136]:
df2.iloc[:]

Unnamed: 0,ANO,ESTADO,POPULAÇÃO,ESTIMATIVA
0,2021,SP,45.9,100
1,2021,MG,21.2,1
2,2021,PR,16.9,2
3,2022,SP,46.6,3
4,2022,MG,21.4,4
5,2022,PR,17.3,5


In [137]:
df2.iloc[1:3]

Unnamed: 0,ANO,ESTADO,POPULAÇÃO,ESTIMATIVA
1,2021,MG,21.2,1
2,2021,PR,16.9,2


In [138]:
# Seleção de Linhas e Colunas
df2.iloc[1:3, [1,2]]

Unnamed: 0,ESTADO,POPULAÇÃO
1,MG,21.2
2,PR,16.9
