In [1]:
import pandas as pd

In [2]:
bond = pd.read_csv("jamesbond.csv")
bond["Year"] = bond["Year"].astype("int")
bond.head(3)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [3]:
bond.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 26 entries, 0 to 25
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Film               26 non-null     object 
 1   Year               26 non-null     int64  
 2   Actor              26 non-null     object 
 3   Director           26 non-null     object 
 4   Box Office         26 non-null     float64
 5   Budget             26 non-null     float64
 6   Bond Actor Salary  18 non-null     float64
dtypes: float64(3), int64(1), object(3)
memory usage: 1.5+ KB


<div class="alert alert-block alert-info">
<b>Dica:</b> É sempre bom ter uma visão geral dos tipos de dados que você está lidando para realizar um "casting" de tipos de dados de acordo com as necessidades de manipulação que você desejar realizar com o dataset.
</div>

## **The <mark>.set_index()</mark> and <mark>.reset_index()</mark> Methods**

In [4]:
bond = pd.read_csv("jamesbond.csv") # podemos definir rótulos personalizados para o nosso indice diretamente na importação dos dados. Ex: index_col = "Film"
bond["Year"] = bond["Year"].astype("int")
bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


<div class="alert alert-block alert-info">
<b>Info:</b> Lembre-se que o indice de um dataframe pode conter valores duplicados.
</div>

In [5]:
bond.set_index(keys = "Film", inplace = True)

In [6]:
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [7]:
bond.reset_index().head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


<div class="alert alert-block alert-info">
    <b>Info:</b> O método '.reset_index()' possui o parâmetro 'drop' que é responsável por descartar a coluna que estava setada anteriormente como indíce do dataset.
</div>

## **Retrieve rows by index label with .loc() Accessor**

In [8]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film") # ordernar o indice torna o processamento dos dados mais eficiente
bond["Year"] = bond["Year"].astype("int")
bond.sort_index(inplace = True)
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [9]:
bond.loc["Goldfinger"]

Year                         1964
Actor                Sean Connery
Director             Guy Hamilton
Box Office                  820.4
Budget                       18.6
Bond Actor Salary             3.2
Name: Goldfinger, dtype: object

<div class="alert alert-block alert-info">
    <b>Info:</b> Acredito que o método loc conter colchetes, é pra tornar mais intuitivo que estamos querendo recuperar uma informação por seu indice (da maneiro pela qual acessamos informações em listas e dicionários em python).
</div>

### Uma das vantagens que 'Series' pandas possuem sobre os dicionários python, é que os rótulos suportam valores duplicados.

In [10]:
bond.loc['Casino Royale']

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [11]:
bond.loc["Diamonds Are Forever":"From Russia With Love":2] # o último valor não é incluso

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6


In [12]:
bond.loc["GoldenEye":]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5


In [13]:
bond.loc[["Die Another Day", "Octopussy"]] # A ordem mantida não é o do Dataframe, mas a ordem dos indices passados

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8


In [14]:
# Se precisar verificar se um determinado rótulo está presente em um dataframe, utilize o operador 'in'
"Luquinhas" in bond.index

False

## **Retrieve Rows by Index Position with .iloc[] Accessor**

In [15]:
bond = pd.read_csv("jamesbond.csv") 
bond["Year"] = bond["Year"].astype("int")
bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [16]:
bond.iloc[0]

Film                        Dr. No
Year                          1962
Actor                 Sean Connery
Director             Terence Young
Box Office                   448.8
Budget                         7.0
Bond Actor Salary              0.6
Name: 0, dtype: object

In [17]:
bond.iloc[0:3]

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


#### Um detalhe importante a se observar é que mesmo que personalizemos os indíces os rotulando, o pandas por debaixo dos "panos" irá preservar a ordem dos nossos dados com indices

In [18]:
bond.set_index("Film", inplace = True)

In [19]:
bond.sort_index(inplace = True)
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [20]:
bond.iloc[3:]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,


## **Second Arguments to iloc and loc Accessors**

In [21]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond["Year"] = bond["Year"].astype("int")
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [22]:
bond.loc["Moonraker", "Actor"] # o segundo argumento representa o valor da coluna que queremos extrair do rótulo em questão

'Roger Moore'

In [23]:
bond.loc["Moonraker", ["Actor", "Director"]]

Actor         Roger Moore
Director    Lewis Gilbert
Name: Moonraker, dtype: object

In [24]:
bond.loc[["Moonraker", "A View to a Kill"], ["Actor", "Director"]]

Unnamed: 0_level_0,Actor,Director
Film,Unnamed: 1_level_1,Unnamed: 2_level_1
Moonraker,Roger Moore,Lewis Gilbert
A View to a Kill,Roger Moore,John Glen


In [25]:
bond.loc["Casino Royale":"Moonraker", ["Actor", "Bond Actor Salary"]]

Unnamed: 0_level_0,Actor,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1
Casino Royale,Daniel Craig,3.3
Casino Royale,David Niven,
Diamonds Are Forever,Sean Connery,5.8
Die Another Day,Pierce Brosnan,17.9
Dr. No,Sean Connery,0.6
For Your Eyes Only,Roger Moore,
From Russia with Love,Sean Connery,1.6
GoldenEye,Pierce Brosnan,5.1
Goldfinger,Sean Connery,3.2
Licence to Kill,Timothy Dalton,7.9


#### O primeiro argumento antes da vírgula nos permite localizar registros realizando operações com indices rotulados. Já o segundo argumento (após a vírgula), nos permite filtrar as colunas que queremos obter.

In [26]:
bond.iloc[14, 2]

'John Glen'

In [27]:
bond.iloc[14:16, 2]

Film
Octopussy                              John Glen
On Her Majesty's Secret Service    Peter R. Hunt
Name: Director, dtype: object

In [28]:
bond.iloc[14:16, 2:4]

Unnamed: 0_level_0,Director,Box Office
Film,Unnamed: 1_level_1,Unnamed: 2_level_1
Octopussy,John Glen,373.8
On Her Majesty's Secret Service,Peter R. Hunt,291.5


<div class="alert alert-block alert-info">
    <b>Info:</b> O primeiro argumento representa as linhas que queremos recuperar e o segundo argumento representa as colunas que queremos filtrar da linha selecionada.
</div>

## **Set New Values for a Specific Cell**

In [29]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond["Year"] = bond["Year"].astype("int")
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [30]:
bond.loc["Dr. No", "Actor"] = "Lucas Mariano"

In [31]:
bond.loc["Dr. No"]

Year                          1962
Actor                Lucas Mariano
Director             Terence Young
Box Office                   448.8
Budget                         7.0
Bond Actor Salary              0.6
Name: Dr. No, dtype: object

#### Veja abaixo como podemos sobrescrever múltiplos valores em uma única operação:

In [32]:
bond.loc["Dr. No", ["Year", "Director", "Bond Actor Salary"]] = [2023, "Tami Silva", 40_000_000_000]

In [33]:
bond.loc["Dr. No"]

Year                          2023
Actor                Lucas Mariano
Director                Tami Silva
Box Office                   448.8
Budget                         7.0
Bond Actor Salary    40000000000.0
Name: Dr. No, dtype: object

## **Set Multiple Values in Dataframe**

In [34]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond["Year"] = bond["Year"].astype("int")
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [35]:
bond.loc[bond["Actor"] == "Sean Connery", "Actor"] = "Lucas Mariano" # retorna uma Série Pandas que ainda estará conectada ao Dataframe original.

In [36]:
bond.loc[bond["Actor"] == "Lucas Mariano"]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Lucas Mariano,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Lucas Mariano,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Lucas Mariano,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Lucas Mariano,Guy Hamilton,820.4,18.6,3.2
Never Say Never Again,1983,Lucas Mariano,Irvin Kershner,380.0,86.0,
Thunderball,1965,Lucas Mariano,Terence Young,848.1,41.9,4.7
You Only Live Twice,1967,Lucas Mariano,Lewis Gilbert,514.2,59.9,4.4


#### Sempre que utilizamos a sintaxe de colchetes para extrair um subconjunto de dados, estamos obtendo uma cópia dos valores originais, e para esses casos, não estaremos de fato modificando o quadro de dados em que estamos realmente trabalhando.

## **Rename index labels or columns in a Dataframe**

In [37]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)
bond["Year"] = bond["Year"].astype("int")
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [38]:
bond.rename(mapper = {"GoldenEye": "Golden Eye", "The World Is Not Enough": "Best Bond Movie Ever"})

bond.rename(mapper = {"GoldenEye": "Golden Eye", "The World Is Not Enough": "Best Bond Movie Ever"}, axis = 0)
bond.rename(mapper = {"GoldenEye": "Golden Eye", "The World Is Not Enough": "Best Bond Movie Ever"}, axis = "rows")
bond.rename(mapper = {"GoldenEye": "Golden Eye", "The World Is Not Enough": "Best Bond Movie Ever"}, axis = "index")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Golden Eye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [39]:
bond.rename(index = {"GoldenEye": "Golden Eye", "The World Is Not Enough": "Best Bond Movie Ever"})

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Golden Eye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [40]:
bond.rename(mapper = {"Year": "Release Date", "Box Office": "Revenue"}, axis = 1)
bond.rename(mapper = {"Year": "Release Date", "Box Office": "Revenue"}, axis = "columns")
bond.rename(columns = {"Year": "Release Date", "Box Office": "Revenue"})

Unnamed: 0_level_0,Release Date,Actor,Director,Revenue,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


#### Veja uma forma alternativa para alterar o nome das colunas do Dataframe de uma maneira mais intuitiva:

In [41]:
bond.columns = ["Year of Release", "Bond Actor", "Director", "Revenue", "Budget", "Salary"]

In [42]:
bond.head(1)

Unnamed: 0_level_0,Year of Release,Bond Actor,Director,Revenue,Budget,Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1


## **Delete rows or columns from a Dataframe**

In [43]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond["Year"] = bond["Year"].astype("int")
bond.sort_index(inplace = True)

bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [46]:
bond.drop(labels = "A View to a Kill").head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8


In [50]:
# Removendo múltiplas colunas

bond.drop(labels = ["Year", "Actor", "Director", "Box Office"], axis = 1).head(3)
bond.drop(labels = ["Year", "Actor", "Director", "Box Office"], axis = "columns").head(3)
bond.drop(columns = ["Year", "Actor", "Director", "Box Office"]).head(3)

Unnamed: 0_level_0,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1
A View to a Kill,54.5,9.1
Casino Royale,145.3,3.3
Casino Royale,85.0,


#### Existe outra abordagem para deletar registros de um dataframe pandas, mas essa abordagem alternativa além de remover a série ou coluna desejada, retorna essa Serie pandas removida.

In [56]:
actor = bond.pop("Actor")

In [58]:
bond.head(1)

Unnamed: 0_level_0,Year,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A View to a Kill,1985,John Glen,275.2,54.5,9.1


In [59]:
actor.head()

Film
A View to a Kill           Roger Moore
Casino Royale             Daniel Craig
Casino Royale              David Niven
Diamonds Are Forever      Sean Connery
Die Another Day         Pierce Brosnan
Name: Actor, dtype: object

#### Outra alternativa para deletar dados do nosso Dataframe pandas é utilizando a palavra reservada "del"

In [60]:
del bond["Director"]

In [61]:
bond.head(2)

Unnamed: 0_level_0,Year,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A View to a Kill,1985,275.2,54.5,9.1
Casino Royale,2006,581.5,145.3,3.3


## **Create a Random Sample**

In [63]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond["Year"] = bond["Year"].astype("int")
bond.sort_index(inplace = True)

bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [66]:
bond.sample(n = 3) # retorna um novo Dataframe e a ordem dos indices é aleatória / não altera o quadro de dados original

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1


In [68]:
bond.sample(frac = .25) # aqui estamos escolhendo a porcentagem de dados que queremos utilizar para amostras aleatórias

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
The Spy Who Loved Me,1977,Roger Moore,Lewis Gilbert,533.0,45.1,
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
The Living Daylights,1987,Timothy Dalton,John Glen,313.5,68.8,5.2


In [73]:
bond.sample(n = 2, axis = 1).head(3) # obtendo amostras aleatórias do nosso dataframe apartir das colunas

Unnamed: 0_level_0,Box Office,Actor
Film,Unnamed: 1_level_1,Unnamed: 2_level_1
A View to a Kill,275.2,Roger Moore
Casino Royale,581.5,Daniel Craig
Casino Royale,315.0,David Niven


## **The .nsmallest() and .nlargest() Methods**

In [74]:
bond = pd.read_csv("jamesbond.csv", index_col = "Film")
bond.sort_index(inplace = True)

bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [77]:
# Obtendo os três maiores filmes do James Bond

bond.sort_values(by = ["Box Office"], ascending = False).head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [79]:
# O método abaixo realiza a mesma operação acima porém de uma forma mais elegante

bond.nlargest(n = 3, columns = ["Box Office"])

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [80]:
# Obtendo os três piores filmes

bond.nsmallest(n = 3, columns = ["Box Office"]) # aparentemente a palavra "kill" não atrai muitas pessoas para filmes

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6


In [81]:
# Utilizando os métodos combinados com Series Pandas

bond["Box Office"].nlargest(3)

Film
Skyfall        943.5
Thunderball    848.1
Goldfinger     820.4
Name: Box Office, dtype: float64