# 1) Intro to DataFrame III + Import Dataset

In [2]:
import pandas as pd

In [5]:
bond = pd.read_csv('Data/jamesbond.csv')
bond.head(3)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


----

# 2) The `set_index` and `reset_index` Methods

### using `index_col`

In [7]:
bond = pd.read_csv('Data/jamesbond.csv', index_col=['Film'])
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


## using `set_index` for single level

In [18]:
bond = pd.read_csv('Data/jamesbond.csv')
bond.head(3)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [19]:
bond = bond.set_index('Film')
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


## using `.reset_index()`
+ `drop=False`: don't drop the column which was formally an index

In [20]:
bond.reset_index()
bond.reset_index(drop=False) # Film column is still back in df

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
5,You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
6,On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
7,Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
8,Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
9,The Man with the Golden Gun,1974,Roger Moore,Guy Hamilton,334.0,27.7,


In [21]:
bond.reset_index(drop=True) # now Flim column is gone

Unnamed: 0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,1967,David Niven,Ken Hughes,315.0,85.0,
5,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
6,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
7,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
8,1973,Roger Moore,Guy Hamilton,460.3,30.8,
9,1974,Roger Moore,Guy Hamilton,334.0,27.7,


---------

### Let's say we want to replace `Year` column with current index of `Film`

In [22]:
bond.set_index('Year') # if we do like this, the original Film index will be gone, So we need to avoid this.

Unnamed: 0_level_0,Actor,Director,Box Office,Budget,Bond Actor Salary
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1962,Sean Connery,Terence Young,448.8,7.0,0.6
1963,Sean Connery,Terence Young,543.8,12.6,1.6
1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
1965,Sean Connery,Terence Young,848.1,41.9,4.7
1967,David Niven,Ken Hughes,315.0,85.0,
1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4
1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
1973,Roger Moore,Guy Hamilton,460.3,30.8,
1974,Roger Moore,Guy Hamilton,334.0,27.7,


### to avoid the above scenarios, we need to like below
+ reset the index
+ then set the index of preferred column

In [24]:
bond = bond.reset_index()
bond = bond.set_index('Year')
bond.head(3)

Unnamed: 0_level_0,Film,Actor,Director,Box Office,Budget,Bond Actor Salary
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1962,Dr. No,Sean Connery,Terence Young,448.8,7.0,0.6
1963,From Russia with Love,Sean Connery,Terence Young,543.8,12.6,1.6
1964,Goldfinger,Sean Connery,Guy Hamilton,820.4,18.6,3.2


-------

# 3) Retrieve Rows by Index Label with `.loc[]` Accessor

In [28]:
bond = pd.read_csv('Data/jamesbond.csv', index_col=['Film'])
bond = bond.sort_index()
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


## TIP: sort the index beforehand will boost pandas searching performance
It is same rule. It is easy for us to search and find the meaing of the keyword in dictionary if the keywords are sorted in order beforehand, compared to randomly jumbled.

------

# `.loc[]` Accessor access the rows by `index label`
+ the possible reason we need to use square brackets [] is in pandas, when we use indexing to access values are almost using []. Maybe that's why pandas developers developed .loc with [] square brackets.

In [35]:
bond.loc['Goldfinger']
bond.loc['GoldenEye']

# bond.loc['blah']

Year                            1995
Actor                 Pierce Brosnan
Director             Martin Campbell
Box Office                     518.5
Budget                          76.9
Bond Actor Salary                5.1
Name: GoldenEye, dtype: object

### If there are more than one row for that Index label, it returns as DF instead of series

In [36]:
bond.loc['Casino Royale']

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [42]:
bond.loc['Diamonds Are Forever': 'From Russia With Love'] # in index label in pandas, starting bounds are inclusive
bond.loc['Diamonds Are Forever': 'From Russia With Love': 2] # jump 2 everytime

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6


In [45]:
bond.loc['GoldenEye': ]

bond.loc[ : 'Skyfall' ]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [50]:
bond.loc[['Skyfall', 'Goldfinger', 'Octopussy']] # order are kept in the same way
bond.loc[['Octopussy', 'Die Another Day']]

# bond.loc[['Octopussy', 'Die Another Day', 'Blah']] # this will result in KeyError

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


### to avoid getting such KeyError, we should always check the key exist or not

In [57]:
'Skyfall' in bond.index

'Blah' in bond.index

False

In [56]:
if 'Skyfall' in bond.index:
    print(bond.loc['Skyfall'])

Year                         2012
Actor                Daniel Craig
Director               Sam Mendes
Box Office                  943.5
Budget                      170.2
Bond Actor Salary            14.5
Name: Skyfall, dtype: object


-------

# 4) Retrieve Rows by Index Position with `iloc` Accessor

In [77]:
bond = pd.read_csv('Data/jamesbond.csv')
bond.head(3)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [78]:
bond.iloc[0]
bond.iloc[15]
bond.iloc[[10, 20, 25]]

# bond.iloc[100]

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
10,The Spy Who Loved Me,1977,Roger Moore,Lewis Gilbert,533.0,45.1,
20,The World Is Not Enough,1999,Pierce Brosnan,Michael Apted,439.5,158.3,13.5
25,Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,


In [79]:
bond.iloc[15: 20] # ending bound is exclusive
bond.iloc[20:]
bond.iloc[: 5]

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [80]:
bond = bond.set_index('Film')
bond = bond.sort_index()
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [82]:
bond.loc['A View to a Kill']

Year                        1985
Actor                Roger Moore
Director               John Glen
Box Office                 275.2
Budget                      54.5
Bond Actor Salary            9.1
Name: A View to a Kill, dtype: object

In [88]:
bond.iloc[0]
bond.iloc[15]
bond.iloc[10:16]

# bond.iloc[[10, 20, 30]]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6


-------

# 5) Second Arguments to `loc` and `iloc` Accessors

In [94]:
bond = pd.read_csv('Data/jamesbond.csv', index_col=['Film'])
bond = bond.sort_index()
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


## Getting Rows, Columns Values (intersection of value)
+ mix and matches of slicing

In [100]:
bond.loc['Moonraker', 'Actor']
bond.loc['Moonraker', 'Director']

'Lewis Gilbert'

In [104]:
bond.loc['Moonraker', ['Director', 'Box Office']]
bond.loc[['Moonraker', 'A View to a Kill'], ['Director', 'Box Office']]

Unnamed: 0_level_0,Director,Box Office
Film,Unnamed: 1_level_1,Unnamed: 2_level_1
Moonraker,Lewis Gilbert,535.0
A View to a Kill,John Glen,275.2


In [109]:
bond.loc['Moonraker', 'Director': 'Budget']
bond.loc['Moonraker': 'Thunderball', 'Director': 'Budget']
bond.loc['Moonraker': , 'Director': ]
bond.loc[: 'Moonraker', : 'Budget']

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6


In [113]:
bond.iloc[14]
bond.iloc[14, 2]

'John Glen'

In [114]:
bond.iloc[14, 2:5]

Director      John Glen
Box Office        373.8
Budget             53.9
Name: Octopussy, dtype: object

In [117]:
bond.iloc[[14, 17], [2, 4]]

bond.iloc[: 15, : 4]

bond.iloc[7: , [0, 5]]

Unnamed: 0_level_0,Year,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1
From Russia with Love,1963,1.6
GoldenEye,1995,5.1
Goldfinger,1964,3.2
Licence to Kill,1989,7.9
Live and Let Die,1973,
Moonraker,1979,
Never Say Never Again,1983,
Octopussy,1983,7.8
On Her Majesty's Secret Service,1969,0.6
Quantum of Solace,2008,8.1


----

# 6) Set New Value for a Specific Cell

In [118]:
bond = pd.read_csv('Data/jamesbond.csv', index_col=['Film'])
bond = bond.sort_index()
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


### we can directly assign new value

In [122]:
bond.loc['Dr. No', 'Actor'] = 'Sir Sean Connery'

In [123]:
bond.loc['Dr. No', 'Actor']

'Sir Sean Connery'

In [126]:
bond.loc['Dr. No', ['Box Office', 'Budget', 'Bond Actor Salary']] = [4480000, 7000000, 6000000]

In [127]:
bond.loc['Dr. No', ['Box Office', 'Budget', 'Bond Actor Salary']] 

Box Office           4.48e+06
Budget                  7e+06
Bond Actor Salary       6e+06
Name: Dr. No, dtype: object

-----

# 7) Set Multiple Values in DataFrame

In [128]:
bond = pd.read_csv('Data/jamesbond.csv', index_col=['Film'])
bond = bond.sort_index()
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [139]:
actor_is_sean_connery = bond['Actor'] == 'Sean Connery'

In [140]:
# bond[actor_is_sean_connery]['Actor'] = 'Sir Sean Connery'  # incorrect way to do that, will result in warning

## using `.loc` by passing `series` to make direct changes

In [144]:
# this will make reference to the original dataframe
# this is a subset of orignal data frame, so if we make changes it will directly change to orignal one
bond.loc[actor_is_sean_connery, 'Actor'] = 'Sir Sean Connery'

In [145]:
bond[actor_is_sean_connery]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sir Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sir Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sir Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sir Sean Connery,Guy Hamilton,820.4,18.6,3.2
Never Say Never Again,1983,Sir Sean Connery,Irvin Kershner,380.0,86.0,
Thunderball,1965,Sir Sean Connery,Terence Young,848.1,41.9,4.7
You Only Live Twice,1967,Sir Sean Connery,Lewis Gilbert,514.2,59.9,4.4


------

# 8) Rename Index Labels or Columns in a `DataFrame`

In [146]:
bond = pd.read_csv('Data/jamesbond.csv', index_col=['Film'])
bond = bond.sort_index()
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


# 8.1) Renaming Index Labels

## Option 1) using `mapper` parameter
+ provide `dictionary with {current_name: new_name}`
+ mapper needs to be combined with `axis` parameter

In [151]:
bond.rename(mapper={'GoldenEye': 'Golden Eye',
                           'The World Is Not Enough': 'Best Bond Movie Ever'})

bond.rename(mapper={'GoldenEye': 'Golden Eye',
                           'The World Is Not Enough': 'Best Bond Movie Ever'}, axis=0)

bond.rename(mapper={'GoldenEye': 'Golden Eye',
                           'The World Is Not Enough': 'Best Bond Movie Ever'}, axis='rows')

bond.rename(mapper={'GoldenEye': 'Golden Eye',
                           'The World Is Not Enough': 'Best Bond Movie Ever'}, axis='index')

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Golden Eye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


## Option 2) using `index` parameter (Prefer Approach)
+ when using index, no need to specify axis because both are basically the same. If use both, it will cause error.

In [152]:
bond.rename(index={'GoldenEye': 'Golden Eye',
                           'The World Is Not Enough': 'Best Bond Movie Ever'})

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Golden Eye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [154]:
bond = bond.rename(index={'GoldenEye': 'Golden Eye',
                           'The World Is Not Enough': 'Best Bond Movie Ever'})

In [155]:
bond

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Golden Eye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


# 8.1) Renaming Columns

In [156]:
bond.head(1)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1


## Option 1) using `mapper` parameter with `axis`

In [159]:
bond.rename(mapper={
    'Year': 'Released Date',
    'Box Office': 'Revenue'
}, axis=1)

bond.rename(mapper={
    'Year': 'Released Date',
    'Box Office': 'Revenue'
}, axis='columns')

Unnamed: 0_level_0,Released Date,Actor,Director,Revenue,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Golden Eye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


## Option 2) using `columns` parameter (Prefer Method)

In [160]:
bond.rename(columns={
    'Year': 'Released Date',
    'Box Office': 'Revenue'
})

Unnamed: 0_level_0,Released Date,Actor,Director,Revenue,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Golden Eye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [161]:
bond = bond.rename(columns={
    'Year': 'Released Date',
    'Box Office': 'Revenue'
})

bond.head(1)

Unnamed: 0_level_0,Released Date,Actor,Director,Revenue,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1


## Option 3) changing columns name using `list values`
+ in this approach, we need to pass all the columns name regardless of whether we want to change or not

In [163]:
bond.columns

Index(['Released Date', 'Actor', 'Director', 'Revenue', 'Budget',
       'Bond Actor Salary'],
      dtype='object')

In [165]:
bond.columns = ['Release Date', 'Actor', 'Director', 'Gross', 'Cost', 'Bond Actor Salary']

In [166]:
bond.head(1)

Unnamed: 0_level_0,Release Date,Actor,Director,Gross,Cost,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1


--------

# 9) Delete Rows or Columns from a DataFrame

In [167]:
bond = pd.read_csv('Data/jamesbond.csv', index_col=['Film'])
bond = bond.sort_index()
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


# 9.1) Dropping Row/s using `.drop()` Method

In [180]:
bond.drop('A View to a Kill')
bond.drop('Casino Royale')

bond.drop(['A View to a Kill', 'Die Another Day', 'Goldfinger'])

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,


## 9.1.2) Dropping Column/s using `.drop()` Method

In [181]:
bond.drop('Year', axis=1)
bond.drop('Year', axis='columns')

bond.drop(['Actor', 'Budget', 'Year'], axis=1)

Unnamed: 0_level_0,Director,Box Office,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A View to a Kill,John Glen,275.2,9.1
Casino Royale,Martin Campbell,581.5,3.3
Casino Royale,Ken Hughes,315.0,
Diamonds Are Forever,Guy Hamilton,442.5,5.8
Die Another Day,Lee Tamahori,465.4,17.9
Dr. No,Terence Young,448.8,0.6
For Your Eyes Only,John Glen,449.4,
From Russia with Love,Terence Young,543.8,1.6
GoldenEye,Martin Campbell,518.5,5.1
Goldfinger,Guy Hamilton,820.4,3.2


In [182]:
bond.head(1)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1


----------

# 9.2) Popping and Deleteing columns using `.pop()` Method
+ remove the value from original dataframe
+ also return that value 

In [183]:
actor = bond.pop('Actor')
actor

Film
A View to a Kill                      Roger Moore
Casino Royale                        Daniel Craig
Casino Royale                         David Niven
Diamonds Are Forever                 Sean Connery
Die Another Day                    Pierce Brosnan
Dr. No                               Sean Connery
For Your Eyes Only                    Roger Moore
From Russia with Love                Sean Connery
GoldenEye                          Pierce Brosnan
Goldfinger                           Sean Connery
Licence to Kill                    Timothy Dalton
Live and Let Die                      Roger Moore
Moonraker                             Roger Moore
Never Say Never Again                Sean Connery
Octopussy                             Roger Moore
On Her Majesty's Secret Service    George Lazenby
Quantum of Solace                    Daniel Craig
Skyfall                              Daniel Craig
Spectre                              Daniel Craig
The Living Daylights               Timothy Da

In [185]:
bond.head(1) # now 'Actor' column is removed

Unnamed: 0_level_0,Year,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A View to a Kill,1985,John Glen,275.2,54.5,9.1


----------

In [186]:
bond = pd.read_csv('Data/jamesbond.csv', index_col=['Film'])
bond = bond.sort_index()
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


# 9.3) directly deleting columns using `del` keyword

In [187]:
del bond['Director']

In [188]:
bond.head(1) # now Director column is removed

Unnamed: 0_level_0,Year,Actor,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
A View to a Kill,1985,Roger Moore,275.2,54.5,9.1


In [189]:
del bond['Year']

In [190]:
bond.head(1)

Unnamed: 0_level_0,Actor,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A View to a Kill,Roger Moore,275.2,54.5,9.1


--------

# 10) Create Random Sample using `.sample()` Method
+ `n`: number of samples
+ `frac`: fraction or % percentage

In [193]:
bond = pd.read_csv('Data/jamesbond.csv', index_col=['Film'])
bond = bond.sort_index()
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [195]:
bond.sample() # this will return random one sample row

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
The Spy Who Loved Me,1977,Roger Moore,Lewis Gilbert,533.0,45.1,


In [208]:
bond.sample(n=5)

bond.sample(n=5, axis=0)

bond.sample(n=5, axis='index')

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Tomorrow Never Dies,1997,Pierce Brosnan,Roger Spottiswoode,463.2,133.9,10.0
The World Is Not Enough,1999,Pierce Brosnan,Michael Apted,439.5,158.3,13.5
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


In [200]:
bond.shape

(26, 6)

In [199]:
26 * .25 # 6 rows is 25% of original 26 rows

6.5

In [202]:
bond.sample(frac = .25)

bond.sample(frac = .25, axis=0)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
The Living Daylights,1987,Timothy Dalton,John Glen,313.5,68.8,5.2
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,


In [207]:
bond.sample(n = 3, axis=1) # 3 random columns
bond.sample(n=3, axis='columns')

Unnamed: 0_level_0,Year,Bond Actor Salary,Budget
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
A View to a Kill,1985,9.1,54.5
Casino Royale,2006,3.3,145.3
Casino Royale,1967,,85.0
Diamonds Are Forever,1971,5.8,34.7
Die Another Day,2002,17.9,154.2
Dr. No,1962,0.6,7.0
For Your Eyes Only,1981,,60.2
From Russia with Love,1963,1.6,12.6
GoldenEye,1995,5.1,76.9
Goldfinger,1964,3.2,18.6


-------

# 11) The `.nsmallest()` and `.nlargest()` Methods
NOTE: These methods are **very efficient** in sorting in very large dataframe.

In [209]:
bond = pd.read_csv('Data/jamesbond.csv', index_col=['Film'])
bond = bond.sort_index()
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


### Comparing to sort_values, `nlargest` is much more efficient in Speed sake in very large dataset.

In [212]:
bond.sort_values('Box Office', ascending=False).head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [216]:
bond.nlargest(3, columns=['Box Office', 'Budget'])

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [217]:
bond.nsmallest(2, columns='Box Office')

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1


In [219]:
bond.nlargest(3, columns='Budget')

bond.nsmallest(n=6, columns='Bond Actor Salary')

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


In [222]:
bond['Box Office'].nlargest(8)

bond['Year'].nsmallest(2)

Film
Dr. No                   1962
From Russia with Love    1963
Name: Year, dtype: int64

In [223]:
# 3 smallest Box Office moves starring Sean Connery
bond[bond['Actor'] == 'Sean Connery'].nsmallest(3, columns='Box Office')

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6


In [224]:
# Top 3 Box Office Movies starring Sean Connery
bond[bond['Actor'] == 'Sean Connery'].nlargest(3, columns='Box Office')

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6


-----

# 12) Filtering with `where` Method

In [225]:
bond = pd.read_csv('Data/jamesbond.csv', index_col=['Film'])
bond = bond.sort_index()
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [231]:
is_sean_connery = bond['Actor'] == 'Sean Connery'
bond[is_sean_connery]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


### by using `where`, we can get the visual of whole data frame where rows which didn't fulfill condition will also return, but with NaN in columns

It is useful if you don't want just a subset which fulfill condition, but also the whole data frame

In [232]:
bond.where(is_sean_connery)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,,,,,,
Casino Royale,,,,,,
Casino Royale,,,,,,
Diamonds Are Forever,1971.0,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,,,,,,
Dr. No,1962.0,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,,,,,,
From Russia with Love,1963.0,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,,,,,,
Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [234]:
bond.where(bond['Box Office'] > 800)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,,,,,,
Casino Royale,,,,,,
Casino Royale,,,,,,
Diamonds Are Forever,,,,,,
Die Another Day,,,,,,
Dr. No,,,,,,
For Your Eyes Only,,,,,,
From Russia with Love,,,,,,
GoldenEye,,,,,,
Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [235]:
is_box_office_more_than_800 = bond['Box Office'] > 800
bond.where(is_sean_connery & is_box_office_more_than_800)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,,,,,,
Casino Royale,,,,,,
Casino Royale,,,,,,
Diamonds Are Forever,,,,,,
Die Another Day,,,,,,
Dr. No,,,,,,
For Your Eyes Only,,,,,,
From Russia with Love,,,,,,
GoldenEye,,,,,,
Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2


------------

# 13) The `.query()` Method
### NOTE: it only works if column names **doesn't have any spaces in between**

In [236]:
bond = pd.read_csv('Data/jamesbond.csv', index_col=['Film'])
bond = bond.sort_index()
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


### Removing Space in Column Names

In [237]:
bond.columns

Index(['Year', 'Actor', 'Director', 'Box Office', 'Budget',
       'Bond Actor Salary'],
      dtype='object')

In [238]:
new_column_names = [col_name.replace(' ', '_') for col_name in bond.columns] 
new_column_names

['Year', 'Actor', 'Director', 'Box_Office', 'Budget', 'Bond_Actor_Salary']

In [240]:
bond.columns = new_column_names
bond.head(1)

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1


## Querying on DataFrame
+ we are writing literal string

In [249]:
bond.query('Actor == "Sean Connery"')
bond.query('Director == "Terence Young"')
bond.query('Actor != "Roger Moore"')

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,


In [250]:
bond.query('Box_Office > 600')

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7


### `and` / `or` can be used here

In [251]:
bond.query('Actor == "Roger Moore" and Director == "John Glen"')

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8


### using `in` and `not in`

In [253]:
bond.query('Actor in ["Timothy Dalton", "George Lazenby"]')

bond.query('Actor not in ["Roger Moore", "Sean Connery"]')

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
The Living Daylights,1987,Timothy Dalton,John Glen,313.5,68.8,5.2


----

# 14) A Review of the `.apply()` Method on Single Column

In [272]:
bond = pd.read_csv('Data/jamesbond.csv', index_col=['Film'])
bond = bond.sort_index()
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


### We want to add `millions` as suffix to the last 3 columns

In [273]:
def convert_to_string_add_millions(number):
    return str(number) + ' millions!'

In [260]:
bond['Box Office'] = bond['Box Office'].apply(convert_to_string_add_millions)
bond['Budget'] = bond['Budget'].apply(convert_to_string_add_millions)

### Instead of applying function one by one as above, there is more elegant way to do this.

In [274]:
columns = ['Box Office', 'Budget', 'Bond Actor Salary']
for col in columns:
    bond[col] = bond[col].apply(convert_to_string_add_millions)

In [275]:
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2 millions!,54.5 millions!,9.1 millions!
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5 millions!,145.3 millions!,3.3 millions!
Casino Royale,1967,David Niven,Ken Hughes,315.0 millions!,85.0 millions!,nan millions!


---------

# 15) The `.apply()` Method with Row values

In [276]:
bond = pd.read_csv('Data/jamesbond.csv', index_col=['Film'])
bond = bond.sort_index()
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


### We want to assign 3 different types of classifications for Movies

In [277]:
def good_movie(row):
    actor = row[1]
    budget = row [4]
    
    if actor == 'Pierce Brosnan':
        return 'The Best'
    elif actor == 'Roger Moore' and budget > 40:
        return 'Enjoyable'
    else:
        return 'I have no clue.'

In [282]:
bond['classification'] = bond.apply(good_movie, axis='columns') # every row, we are moving left to right to check the conditions
bond.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary,classification
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1,Enjoyable
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3,I have no clue.
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,,I have no clue.
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8,I have no clue.
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9,The Best


------

# 16) The `.copy()` Method 

In [297]:
bond = pd.read_csv('Data/jamesbond.csv', index_col=['Film'])
bond = bond.sort_index()
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [286]:
directors = bond['Director']
directors.head(3)

Film
A View to a Kill          John Glen
Casino Royale       Martin Campbell
Casino Royale            Ken Hughes
Name: Director, dtype: object

In [289]:
directors['A View to a Kill'] = 'Mr. John Glen'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  directors['A View to a Kill'] = 'Mr. John Glen'


In [290]:
directors.head(3)

Film
A View to a Kill      Mr. John Glen
Casino Royale       Martin Campbell
Casino Royale            Ken Hughes
Name: Director, dtype: object

### we can see the changes is also affected in original Data Frame

In [291]:
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,Mr. John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


### What if we don't want original Data Frame get affected? we can use `.copy()`

In [298]:
directors = bond['Director'].copy()
directors.head(3)

Film
A View to a Kill          John Glen
Casino Royale       Martin Campbell
Casino Royale            Ken Hughes
Name: Director, dtype: object

In [299]:
directors['A View to a Kill'] = 'Mr. John Glen'

In [300]:
directors.head(3)

Film
A View to a Kill      Mr. John Glen
Casino Royale       Martin Campbell
Casino Royale            Ken Hughes
Name: Director, dtype: object

In [301]:
bond.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


We can see that the changes in the series doesn't affected the original Data Frame anymore.