In [2]:
import pandas as pd

# Import Data and Setting/Resetting Index

In [4]:
# import data
bond = pd.read_csv('data/jamesbond.csv')
bond.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [6]:
# set index
bond_indexed = bond.set_index(keys = 'Film')
bond_indexed.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [7]:
# reset index (revert back to numbered index)
bond_indexed = bond_indexed.reset_index() # deault drop = False (if set to True, the index col will be dropped)
bond_indexed.head()

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [11]:
# replacing an index col
bond_indexed = bond.set_index(keys = 'Year')

bond_indexed = bond_indexed.reset_index().set_index(keys = 'Film').sort_index()
# above resets the index and then sets the new index to Year. Prevents loss of Film col.
# Film iondex is sorted for performance improvement
bond_indexed.head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9


# Retrieving Rows

In [14]:
# retrieve by index label with loc (returns series with col names as index)
bond_indexed.loc['Goldfinger']

Year                         1964
Actor                Sean Connery
Director             Guy Hamilton
Box Office                  820.4
Budget                       18.6
Bond Actor Salary             3.2
Name: Goldfinger, dtype: object

In [17]:
# retrieve by index label with loc (returns df when there are multiple rows returned)
bond_indexed.loc['Casino Royale']

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [19]:
# retrieve range by index label with loc (similar to Python list slicing)
bond_indexed.loc[
    'Diamonds Are Forever' : 'From Russia with Love'
] # inclusive

# [start : end]
# [start : ] returns all records after start
# [ : end] returns all records up to and including end

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6


In [20]:
# retrieve list of cols by index label with loc
bond_indexed.loc[[
    'Die Another Day', 'For Your Eyes Only', 'Casino Royale'
]] # two brackets to call a list

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [21]:
# retrieve by index position with iloc (returns series with col names as index)
bond.iloc[0]

Film                        Dr. No
Year                          1962
Actor                 Sean Connery
Director             Terence Young
Box Office                   448.8
Budget                         7.0
Bond Actor Salary              0.6
Name: 0, dtype: object

In [27]:
# retrieve by index position with iloc (on an indexed data frame)
bond_indexed.loc['A View to a Kill']
bond_indexed.iloc[0] # same result

Year                        1985
Actor                Roger Moore
Director               John Glen
Box Office                 275.2
Budget                      54.5
Bond Actor Salary            9.1
Name: A View to a Kill, dtype: object

In [22]:
# retrieve range by index position with iloc (similar to Python list slicing)
bond.iloc[0:5] # not inclusive

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [23]:
# retrieve list of rows by index position with iloc
bond.iloc[[
    0, 1, 2, 3, 4
]]

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
4,Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [29]:
# adding second arguments to extract specific column with loc
bond_indexed.loc['Moonraker', 'Actor'] # second argument is the specified column name

'Roger Moore'

In [33]:
# adding second arguments to extract specific columns with loc
bond_indexed.loc['Moonraker', ['Actor', 'Director']]

Actor         Roger Moore
Director    Lewis Gilbert
Name: Moonraker, dtype: object

In [35]:
# extracting multiple rows and columns using loc
bond_indexed.loc[
    ['Moonraker', 'Casino Royale'], # rows
    ['Actor', 'Director'] # columns
]

Unnamed: 0_level_0,Actor,Director
Film,Unnamed: 1_level_1,Unnamed: 2_level_1
Moonraker,Roger Moore,Lewis Gilbert
Casino Royale,Daniel Craig,Martin Campbell
Casino Royale,David Niven,Ken Hughes


In [43]:
# extracting multiple rows and columns using iloc
bond.iloc[
    0:5, # rows
    [0, 1, 2, 6] # columns
]

Unnamed: 0,Film,Year,Actor,Bond Actor Salary
0,Dr. No,1962,Sean Connery,0.6
1,From Russia with Love,1963,Sean Connery,1.6
2,Goldfinger,1964,Sean Connery,3.2
3,Thunderball,1965,Sean Connery,4.7
4,Casino Royale,1967,David Niven,


#### loc and iloc Slicing Syntax

[start : end]

[start : ] returns all records after start

[ : end] returns all records up to and including end

# Setting New Values

In [46]:
# setting a new value for a specific cell
bond_indexed.loc['Dr. No', 'Actor'] = 'Sir Sean Connery'
bond_indexed.loc['Dr. No']

Year                             1962
Actor                Sir Sean Connery
Director                Terence Young
Box Office                      448.8
Budget                            7.0
Bond Actor Salary                 0.6
Name: Dr. No, dtype: object

In [49]:
# setting a new value for multiple cells in a specific row
bond_indexed.loc[
    'Dr. No',
    ['Box Office', 'Budget', 'Bond Actor Salary']
] = [448800000, 7000000, 600000]
bond_indexed.loc['Dr. No']

Year                             1962
Actor                Sir Sean Connery
Director                Terence Young
Box Office                448800000.0
Budget                      7000000.0
Bond Actor Salary            600000.0
Name: Dr. No, dtype: object

In [54]:
# setting a new value for every occurence of an existing value
sean_connery = bond_indexed['Actor'] == 'Sean Connery'

bond_indexed.loc[
    sean_connery,
    'Actor'
] = 'Sir Sean Connery'

sean_connery_films = bond_indexed['Actor'] == 'Sir Sean Connery'
bond_indexed[sean_connery_films].head()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sir Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sir Sean Connery,Terence Young,448800000.0,7000000.0,600000.0
From Russia with Love,1963,Sir Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sir Sean Connery,Guy Hamilton,820.4,18.6,3.2
Never Say Never Again,1983,Sir Sean Connery,Irvin Kershner,380.0,86.0,


# Rename Index Labels or Columns