# **DataFrames**
> ## **Seleção condicional, set_index**
> *Seleção de informações do DataFrame a partir de condições logicas e definindo indices*
> 

In [1]:
import pandas as pd
import numpy as np

In [2]:
from numpy.random import randn
np.random.seed(101) # inicializa o np.random

> Criando um DataFrame a partir de uma array 5x4 de números aleatorios com index de A a E e coluna de W a Z

In [5]:
df = pd.DataFrame(randn(5,4), index = 'A B C D E'.split(), columns='W X Y Z'.split())

In [6]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


> Definindo o booleano bo1 como df > 0, ou seja, será verdadeiro para valores maiores que zero no df

In [7]:
bo1 = df > 0

In [8]:
df[bo1]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,,,0.605965
C,,0.740122,0.528813,
D,0.188695,,,0.955057
E,0.190794,1.978757,2.605967,0.683509


> Exibindo apenas os indices que não possuem valores na coluna W < 0

In [9]:
df[df['W']>0]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


> Aplicando a mesma condição, porém exibindo apenas a coluna Y

In [10]:
df[df['W']>0]['Y']

A    0.907969
B   -0.848077
D   -0.933237
E    2.605967
Name: Y, dtype: float64

In [12]:
bo1 = df['W']>0
df2 = df[bo1]
df2['Y']

A    0.907969
B   -0.848077
D   -0.933237
E    2.605967
Name: Y, dtype: float64

> Exibindo apenas os valores dos indices em que na coluna W é maior que zero e na Y, maior que 1

In [14]:
df[(df['W']>0) & (df['Y']>1)]

Unnamed: 0,W,X,Y,Z
E,0.190794,1.978757,2.605967,0.683509


> Resetando o indice de df, para o padrão (0,1,2,3,4)

In [20]:
df.reset_index(inplace = True)

In [21]:
df

Unnamed: 0,level_0,index,W,X,Y,Z
0,0,A,2.70685,0.628133,0.907969,0.503826
1,1,B,0.651118,-0.319318,-0.848077,0.605965
2,2,C,-2.018168,0.740122,0.528813,-0.589001
3,3,D,0.188695,-0.758872,-0.933237,0.955057
4,4,E,0.190794,1.978757,2.605967,0.683509


> Criando uma nova coluna para df, Estado, com 5 elementos.

In [22]:
col = 'RS RJ SP AM SC'.split() 

In [23]:
df['Estado'] = col

In [24]:
df

Unnamed: 0,level_0,index,W,X,Y,Z,Estado
0,0,A,2.70685,0.628133,0.907969,0.503826,RS
1,1,B,0.651118,-0.319318,-0.848077,0.605965,RJ
2,2,C,-2.018168,0.740122,0.528813,-0.589001,SP
3,3,D,0.188695,-0.758872,-0.933237,0.955057,AM
4,4,E,0.190794,1.978757,2.605967,0.683509,SC


> Transformando a nova coluna em indice

In [25]:
df.set_index('Estado', inplace = True)

In [27]:
df

Unnamed: 0_level_0,level_0,index,W,X,Y,Z
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
RS,0,A,2.70685,0.628133,0.907969,0.503826
RJ,1,B,0.651118,-0.319318,-0.848077,0.605965
SP,2,C,-2.018168,0.740122,0.528813,-0.589001
AM,3,D,0.188695,-0.758872,-0.933237,0.955057
SC,4,E,0.190794,1.978757,2.605967,0.683509


> Deletando a coluna level_0 (o indice anterior)

In [30]:
df.drop('level_0', axis=1)

Unnamed: 0_level_0,index,W,X,Y,Z
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RS,A,2.70685,0.628133,0.907969,0.503826
RJ,B,0.651118,-0.319318,-0.848077,0.605965
SP,C,-2.018168,0.740122,0.528813,-0.589001
AM,D,0.188695,-0.758872,-0.933237,0.955057
SC,E,0.190794,1.978757,2.605967,0.683509


In [31]:
df

Unnamed: 0_level_0,level_0,index,W,X,Y,Z
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
RS,0,A,2.70685,0.628133,0.907969,0.503826
RJ,1,B,0.651118,-0.319318,-0.848077,0.605965
SP,2,C,-2.018168,0.740122,0.528813,-0.589001
AM,3,D,0.188695,-0.758872,-0.933237,0.955057
SC,4,E,0.190794,1.978757,2.605967,0.683509


In [32]:
df.drop('level_0', axis=1, inplace = True)

In [33]:
df

Unnamed: 0_level_0,index,W,X,Y,Z
Estado,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
RS,A,2.70685,0.628133,0.907969,0.503826
RJ,B,0.651118,-0.319318,-0.848077,0.605965
SP,C,-2.018168,0.740122,0.528813,-0.589001
AM,D,0.188695,-0.758872,-0.933237,0.955057
SC,E,0.190794,1.978757,2.605967,0.683509
