## Series

In [2]:
import pandas as pd
import numpy as np

In [3]:
labels = ['a', 'b', 'c']

minha_lista = [10,20,30]

arr = np.array([10,20,30])

d = {
    'a': 10,
    'b': 20,
    'c': 30
    }

In [4]:
pd.Series(labels)

0    a
1    b
2    c
dtype: object

In [5]:
pd.Series(data=labels, index=minha_lista)

10    a
20    b
30    c
dtype: object

In [6]:
pd.Series(labels, minha_lista)

10    a
20    b
30    c
dtype: object

In [7]:
pd.Series(d)

a    10
b    20
c    30
dtype: int64

In [8]:
ser1 = pd.Series([100,525,312,414], ['EUA', 'Alemanha', 'Russia', 'Japão'])

In [9]:
ser2 = pd.Series([100,525,312,414], ['EUA', 'Alemanha', 'Itália', 'Japão'])

In [10]:
ser1[['EUA', 'Alemanha']]

EUA         100
Alemanha    525
dtype: int64

In [11]:
ser1 + ser2

Alemanha    1050.0
EUA          200.0
Itália         NaN
Japão        828.0
Russia         NaN
dtype: float64

## DataFrames

In [12]:
from numpy.random import randn

In [None]:
# criamos um dataframe com valores randomicos aleatórios, definimos o indices e colunas

df = pd.DataFrame(randn(5, 4), index=['A', 'B', 'C', 'D', 'E'], columns="W X Y Z".split())

In [14]:
df

Unnamed: 0,W,X,Y,Z
A,-2.184823,0.241423,0.882687,2.371212
B,1.316429,1.19815,-1.176892,-0.881794
C,-0.333891,0.273467,-1.755003,0.448847
D,-0.76415,-0.716136,0.677443,-0.544981
E,-0.54823,-1.445269,-1.366971,0.598133


In [None]:
# Se pedir para retornar apenas uma, retorna uma Serie
df['W']

A   -2.184823
B    1.316429
C   -0.333891
D   -0.764150
E   -0.548230
Name: W, dtype: float64

In [17]:
type(df['W'])

pandas.core.series.Series

In [19]:
# Se pedirmos para retornar uma coluna, mas passando no formato de lista, retorna um dataframe, e podemos selecionar apenas as colunas desejadas.

df[['W', 'X']]


Unnamed: 0,W,X
A,-2.184823,0.241423
B,1.316429,1.19815
C,-0.333891,0.273467
D,-0.76415,-0.716136
E,-0.54823,-1.445269


In [20]:
type(df[['W', 'X']])

pandas.core.frame.DataFrame

In [22]:
# Criar uma coluna
df['new'] = df['W']

In [23]:
df

Unnamed: 0,W,X,Y,Z,new
A,-2.184823,0.241423,0.882687,2.371212,-2.184823
B,1.316429,1.19815,-1.176892,-0.881794,1.316429
C,-0.333891,0.273467,-1.755003,0.448847,-0.333891
D,-0.76415,-0.716136,0.677443,-0.544981,-0.76415
E,-0.54823,-1.445269,-1.366971,0.598133,-0.54823


In [25]:
# Remover uma coluna - Eixo 1 é coluna e eixo 0 é linha

df.drop('new', axis=1)

Unnamed: 0,W,X,Y,Z
A,-2.184823,0.241423,0.882687,2.371212
B,1.316429,1.19815,-1.176892,-0.881794
C,-0.333891,0.273467,-1.755003,0.448847
D,-0.76415,-0.716136,0.677443,-0.544981
E,-0.54823,-1.445269,-1.366971,0.598133


In [None]:
# Observe que não alterou o original
df

Unnamed: 0,W,X,Y,Z,new
A,-2.184823,0.241423,0.882687,2.371212,-2.184823
B,1.316429,1.19815,-1.176892,-0.881794,1.316429
C,-0.333891,0.273467,-1.755003,0.448847,-0.333891
D,-0.76415,-0.716136,0.677443,-0.544981,-0.76415
E,-0.54823,-1.445269,-1.366971,0.598133,-0.54823


In [27]:
# para alterar o arquivo original, aplicamos o parametro inplace
df.drop('new', axis=1, inplace=True)

In [28]:
df

Unnamed: 0,W,X,Y,Z
A,-2.184823,0.241423,0.882687,2.371212
B,1.316429,1.19815,-1.176892,-0.881794
C,-0.333891,0.273467,-1.755003,0.448847
D,-0.76415,-0.716136,0.677443,-0.544981
E,-0.54823,-1.445269,-1.366971,0.598133


In [34]:
# Seleção em linhas.
# Observe que faz uma transposição.

df.loc['A']

W   -2.184823
X    0.241423
Y    0.882687
Z    2.371212
Name: A, dtype: float64

In [35]:
# Se eu quiser selecionar mais de uma linha, passo uma lista com quais linhas

df.loc[['A', 'B']]

Unnamed: 0,W,X,Y,Z
A,-2.184823,0.241423,0.882687,2.371212
B,1.316429,1.19815,-1.176892,-0.881794


In [37]:
# Posso passar uma lista com as linhas e qual coluna quero apenas

df.loc[['A', 'B'], 'W']

A   -2.184823
B    1.316429
Name: W, dtype: float64

In [36]:
# Já com o iloc a seleçaõ é feita pelo número do indice da linha e coluna.

df.iloc[0, 2]

0.88268679144922

In [42]:
# Fatiar para pegar apenas alguns ou remover parte
# Estou eliminando a primeira e ultima linha e removendo a primeira e ultima coluna
df.iloc[1:-1, 1:3]

Unnamed: 0,X,Y
B,1.19815,-1.176892
C,0.273467,-1.755003
D,-0.716136,0.677443


In [43]:
df

Unnamed: 0,W,X,Y,Z
A,-2.184823,0.241423,0.882687,2.371212
B,1.316429,1.19815,-1.176892,-0.881794
C,-0.333891,0.273467,-1.755003,0.448847
D,-0.76415,-0.716136,0.677443,-0.544981
E,-0.54823,-1.445269,-1.366971,0.598133


In [44]:
# Agora vamos fazer um filtro condicional, para retonar apenas valores maiores que 0

df[df > 0]

Unnamed: 0,W,X,Y,Z
A,,0.241423,0.882687,2.371212
B,1.316429,1.19815,,
C,,0.273467,,0.448847
D,,,0.677443,
E,,,,0.598133


In [46]:
# SE EU QUISER QUE TRAGA APENAS A COLUNA Y ONDE FOR MAIOR QUE ZERO
df['Y'] > 0

A     True
B    False
C    False
D     True
E    False
Name: Y, dtype: bool

In [48]:
# SE EU QUISER QUE TRAGA APENAS OS INDICES QUANDO A COLUNA Y ONDE FOR MAIOR QUE ZERO

df[df['Y'] > 0]

Unnamed: 0,W,X,Y,Z
A,-2.184823,0.241423,0.882687,2.371212
D,-0.76415,-0.716136,0.677443,-0.544981


In [49]:
# quando o filtro precisar de duas condicionais

df[(df['Y'] > 0) & (df['X'] > 0)]

Unnamed: 0,W,X,Y,Z
A,-2.184823,0.241423,0.882687,2.371212
