# .apply() method
use this to apply a custom function to each row or column in a df

In [1]:
#import needed library and data
import pandas as pd
def setUp():
    bond = pd.read_csv("../data/pandas/jamesbond.csv",index_col="Film")
    bond.sort_index(inplace = True)
    return bond

In [2]:
bond = setUp()

## use an .apply method on an entire column (or columns!).
little bit of code, lots of power.

In [3]:
#convert col names to strings
def convertToStringAndAddMillions(num):
    return "$"+str(num) + " million"

In [4]:
columns = ["Box Office", "Budget", "Bond Actor Salary"]# bond["Budget"] = bond["Budget"].apply(convertToStringAndAddMillions)

In [5]:
for col in columns:
    bond[col] = bond[col].apply(convertToStringAndAddMillions)

In [6]:
#check to see how everything was changed in the original dataframe
bond.head(10)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,$275.2 million,$54.5 million,$9.1 million
Casino Royale,2006,Daniel Craig,Martin Campbell,$581.5 million,$145.3 million,$3.3 million
Casino Royale,1967,David Niven,Ken Hughes,$315.0 million,$85.0 million,$nan million
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,$442.5 million,$34.7 million,$5.8 million
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,$465.4 million,$154.2 million,$17.9 million
Dr. No,1962,Sean Connery,Terence Young,$448.8 million,$7.0 million,$0.6 million
For Your Eyes Only,1981,Roger Moore,John Glen,$449.4 million,$60.2 million,$nan million
From Russia with Love,1963,Sean Connery,Terence Young,$543.8 million,$12.6 million,$1.6 million
GoldenEye,1995,Pierce Brosnan,Martin Campbell,$518.5 million,$76.9 million,$5.1 million
Goldfinger,1964,Sean Connery,Guy Hamilton,$820.4 million,$18.6 million,$3.2 million


## Using .apply() with Row Values

In [7]:
#reset data
bond = setUp()

In [8]:
def goodMovie(row):
    
    actor = row[1]
    budget = row[4]
    
    if actor == 'Pierce Brosnan':
        return "The worst"
    elif actor == "Sean Connery" and budget > 40:
        return "Good flick"
    else:
        return "Not even worth it."

#apply to each row using the axis parameter
bond.apply(goodMovie, axis="columns")

Film
A View to a Kill                   Not even worth it.
Casino Royale                      Not even worth it.
Casino Royale                      Not even worth it.
Diamonds Are Forever               Not even worth it.
Die Another Day                             The worst
Dr. No                             Not even worth it.
For Your Eyes Only                 Not even worth it.
From Russia with Love              Not even worth it.
GoldenEye                                   The worst
Goldfinger                         Not even worth it.
Licence to Kill                    Not even worth it.
Live and Let Die                   Not even worth it.
Moonraker                          Not even worth it.
Never Say Never Again                      Good flick
Octopussy                          Not even worth it.
On Her Majesty's Secret Service    Not even worth it.
Quantum of Solace                  Not even worth it.
Skyfall                            Not even worth it.
Spectre                

# .copy method

In [9]:
bond = setUp()

In [10]:
#let's look at a bit of our data to begin with
directors = bond["Director"]
directors.head(10)

Film
A View to a Kill               John Glen
Casino Royale            Martin Campbell
Casino Royale                 Ken Hughes
Diamonds Are Forever        Guy Hamilton
Die Another Day             Lee Tamahori
Dr. No                     Terence Young
For Your Eyes Only             John Glen
From Russia with Love      Terence Young
GoldenEye                Martin Campbell
Goldfinger                  Guy Hamilton
Name: Director, dtype: object

In [11]:
directors['A View to Kill'] = "Mr. John Glen" #notice that you get a warning.  This operation will change the original dataframe.
directors.head(10)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Film
A View to a Kill               John Glen
Casino Royale            Martin Campbell
Casino Royale                 Ken Hughes
Diamonds Are Forever        Guy Hamilton
Die Another Day             Lee Tamahori
Dr. No                     Terence Young
For Your Eyes Only             John Glen
From Russia with Love      Terence Young
GoldenEye                Martin Campbell
Goldfinger                  Guy Hamilton
Name: Director, dtype: object

In [12]:
# in order to not overwrite the orignal data, make a copy of a specific series and manipulate the copy instead.
setUp()

director_copy = bond["Director"].copy()

In [13]:
director_copy["A View to a Kill"] = "Mr. X"
director_copy.head(10)

Film
A View to a Kill                   Mr. X
Casino Royale            Martin Campbell
Casino Royale                 Ken Hughes
Diamonds Are Forever        Guy Hamilton
Die Another Day             Lee Tamahori
Dr. No                     Terence Young
For Your Eyes Only             John Glen
From Russia with Love      Terence Young
GoldenEye                Martin Campbell
Goldfinger                  Guy Hamilton
Name: Director, dtype: object

# .set_index() and .reset_index() method

In [14]:
bond = pd.read_csv("../data/pandas/jamesbond.csv")
bond.sort_index(inplace = True)
# can choose a specific column as the main index of the df.
bond.set_index("Film", inplace = True)

In [15]:
# .reset_index() will move the chosen column back to its expected place and create a new numeric.  the "drop" parameter will get rid of the current index

# .loc[ ] method
retrieve rows by index label

In [16]:
bond = setUp()

In [17]:
bond.loc["Goldfinger"] # a single value will return a series

Year                         1964
Actor                Sean Connery
Director             Guy Hamilton
Box Office                  820.4
Budget                       18.6
Bond Actor Salary             3.2
Name: Goldfinger, dtype: object

In [18]:
bond.loc["GoldenEye"]

Year                            1995
Actor                 Pierce Brosnan
Director             Martin Campbell
Box Office                     518.5
Budget                          76.9
Bond Actor Salary                5.1
Name: GoldenEye, dtype: object

In [19]:
#pandas will throw an error if the key we give it doesn't exist.
#pandas dataframes will return all rows that fit a specific label if there are more than 1 (indexes don't have to be unique in pandas!)
bond.loc["Casino Royale"]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [20]:
#can extract a range of films using python list syntax.  e.g.,
bond.loc["Diamonds Are Forever":"Moonraker"] # notice that this includes the last value

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,


In [21]:
#extract every movie after a specific movie
bond.loc["GoldenEye":]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5


In [22]:
#extract every movie from the beginning up to a specific point. e.g.,
bond.loc[:'GoldenEye']

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,Pierce Brosnan,Martin Campbell,518.5,76.9,5.1


In [23]:
#extract multiple non-sequential values. Need to feed .loc[] a list e.g.,
bond.loc[['Octopussy','Moonraker']]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,


In [24]:
#use a second parameter to only extract specific values from the row.  e.g., just get the director of two different movies
bond.loc[['GoldenEye','Moonraker'], 'Director']

Film
GoldenEye    Martin Campbell
Moonraker      Lewis Gilbert
Name: Director, dtype: object

# .ix[ ] method
basically combines functionality of .loc and .iloc[] methods

NOTE: THIS METHOD IS DEPRECATED.  There are efficiency problems with this method, so don't use it anymore.

In [25]:
#reset df
bond = setUp()

In [26]:
bond.ix['GoldenEye']

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


Year                            1995
Actor                 Pierce Brosnan
Director             Martin Campbell
Box Office                     518.5
Budget                          76.9
Bond Actor Salary                5.1
Name: GoldenEye, dtype: object

# Set new values for a specific cell or row

In [27]:
#set a single value
bond.loc['GoldenEye','Actor'] = 'That one guy'
bond.loc['GoldenEye','Actor']

'That one guy'

In [28]:
#bond.rename() to rename a column from on heaing
bond.rename(columns={'Year':'Release Date'}) #must use the 'inplace parameter to mak'

Unnamed: 0_level_0,Release Date,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,1995,That one guy,Martin Campbell,518.5,76.9,5.1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


# create a random sample with .sample() method

In [29]:
#extract one row at random
bond.sample()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8


In [30]:
bond.sample()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6


In [31]:
bond.sample()

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
The Living Daylights,1987,Timothy Dalton,John Glen,313.5,68.8,5.2


In [32]:
#modify some parameters
bond.sample(n = 5)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
The World Is Not Enough,1999,Pierce Brosnan,Michael Apted,439.5,158.3,13.5
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [33]:
# extract a % of the total frame.  e.g., 25% of the sample
bond.sample(frac=.25)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
The Spy Who Loved Me,1977,Roger Moore,Lewis Gilbert,533.0,45.1,
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
The World Is Not Enough,1999,Pierce Brosnan,Michael Apted,439.5,158.3,13.5
Moonraker,1979,Roger Moore,Lewis Gilbert,535.0,91.5,


In [34]:
#change the axis to cols.  Extract a random column
bond.sample(frac = .1, axis=1)

Unnamed: 0_level_0,Budget
Film,Unnamed: 1_level_1
A View to a Kill,54.5
Casino Royale,145.3
Casino Royale,85.0
Diamonds Are Forever,34.7
Die Another Day,154.2
Dr. No,7.0
For Your Eyes Only,60.2
From Russia with Love,12.6
GoldenEye,76.9
Goldfinger,18.6


# .nsmallest() and nlargest() methods
used to extract a row with the smallest or largest values in a specific column

In [35]:
bond.nsmallest(3, 'Box Office')

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6


In [36]:
bond.nlargest(3,'Box Office')

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [37]:
#3 largest buggets
bond.nlargest(3,'Budget')

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5


In [38]:
bond.nsmallest(6,'Bond Actor Salary')

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


In [39]:
#can also extract just a series
bond['Box Office'].nlargest(5)

Film
Skyfall          943.5
Thunderball      848.1
Goldfinger       820.4
Spectre          726.7
Casino Royale    581.5
Name: Box Office, dtype: float64

# Filter data using .where() method

In [40]:
#quick review of the other filtering method
#create a Boolean mask
mask = bond['Actor'] == "Sean Connery"
bond[mask]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


In [41]:
#notice that using the .where() method returns the entire frame instead of just a subset.
# rows not matching the condition are all returned with NaN values
bond.where(mask)


Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,,,,,,
Casino Royale,,,,,,
Casino Royale,,,,,,
Diamonds Are Forever,1971.0,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Die Another Day,,,,,,
Dr. No,1962.0,Sean Connery,Terence Young,448.8,7.0,0.6
For Your Eyes Only,,,,,,
From Russia with Love,1963.0,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,,,,,,
Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [42]:
#where with multiple conditions
mask2 = bond['Box Office'] > 500
bond.where(mask & mask2)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,,,,,,
Casino Royale,,,,,,
Casino Royale,,,,,,
Diamonds Are Forever,,,,,,
Die Another Day,,,,,,
Dr. No,,,,,,
For Your Eyes Only,,,,,,
From Russia with Love,1963.0,Sean Connery,Terence Young,543.8,12.6,1.6
GoldenEye,,,,,,
Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2
