In [1]:
import pandas as pd

In [18]:
bond = pd.read_csv("jamesbond.csv")
bond["Year"] = bond["Year"].astype("int")
bond.head(3)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [20]:
bond.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Film               26 non-null     object 
 1   Year               26 non-null     int64  
 2   Actor              26 non-null     object 
 3   Director           26 non-null     object 
 4   Box Office         26 non-null     float64
 5   Budget             26 non-null     float64
 6   Bond Actor Salary  18 non-null     float64
dtypes: float64(3), int64(1), object(3)
memory usage: 1.5+ KB


<div class="alert alert-block alert-info">
<b>Dica:</b> É sempre bom ter uma visão geral dos tipos de dados que você está lidando para realizar um "casting" de tipos de dados de acordo com as necessidades de manipulação que você desejar realizar com o dataset.
</div>

## **The <mark>.set_index()</mark> and <mark>.reset_index()</mark> Methods**

In [25]:
bond = pd.read_csv("jamesbond.csv") # podemos definir rótulos personalizados para o nosso indice diretamente na importação dos dados. Ex: index_col = "Film"
bond["Year"] = bond["Year"].astype("int")
bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


<div class="alert alert-block alert-info">
<b>Info:</b> Lembre-se que o indice de um dataframe pode conter valores duplicados.
</div>

In [28]:
bond.set_index(keys = "Film", inplace = True)

In [29]:
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [31]:
bond.reset_index().head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


<div class="alert alert-block alert-info">
    <b>Info:</b> O método '.reset_index()' possui o parâmetro 'drop' que é responsável por descartar a coluna que estava setada anteriormente como indíce do dataset.
</div>

## **Retrieve rows by index label with .loc() Accessor**

In [34]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film") # ordernar o indice torna o processamento dos dados mais eficiente
bond["Year"] = bond["Year"].astype("int")
bond.sort_index(inplace = True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [35]:
bond.loc["Goldfinger"]

Year                         1964
Actor                Sean Connery
Director             Guy Hamilton
Box Office                  820.4
Budget                       18.6
Bond Actor Salary             3.2
Name: Goldfinger, dtype: object

<div class="alert alert-block alert-info">
    <b>Info:</b> Acredito que o método loc conter colchetes, é pra tornar mais intuitivo que estamos querendo recuperar uma informação por seu indice (da maneiro pela qual acessamos informações em listas e dicionários em python).
</div>

### Uma das vantagens que 'Series' pandas possuem sobre os dicionários python, é que os rótulos suportam valores duplicados.

In [41]:
bond.loc['Casino Royale']

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [49]:
bond.loc["Diamonds Are Forever":"From Russia With Love":2] # o último valor não é incluso

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6


In [51]:
bond.loc["GoldenEye":]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5


In [52]:
bond.loc[["Die Another Day", "Octopussy"]] # A ordem mantida não é o do Dataframe, mas a ordem dos indices passados

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8


In [56]:
# Se precisar verificar se um determinado rótulo está presente em um dataframe, utilize o operador 'in'
"Luquinhas" in bond.index

False