#### Axis Parameter
- "columns", "1"
- "rows", "index", "0"
- default is "None"

In [2]:
import pandas as pd

In [2]:
# dataframe 
df = pd.read_csv("jamesbond.csv")
df.head(3)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


## The `.set_index()` and `.reset_index()` Methods

In [3]:
# how to set a column as the index
df = pd.read_csv("jamesbond.csv", index_col = "Film") 
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [4]:
# using the .set_index() method
df = pd.read_csv("jamesbond.csv")
df.set_index("Film", inplace = True)
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [5]:
df.reset_index().head(3)
df.reset_index(drop = False) # keeps the Film column that was used as the index
df.reset_index(drop = True) # removes the Film column
df.reset_index(drop = False, inplace = True)
df.head(3)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [6]:
df.set_index("Film", inplace = True)
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [7]:
# df.set_index("Year") # automatically replaced Film as index

# to avoid loss: 
df.reset_index(inplace = True)
df.set_index("Year", inplace = True)
df.head(3)

Unnamed: 0_level_0,Film,Actor,Director,Box Office,Budget,Bond Actor Salary
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1962,Dr. No,Sean Connery,Terence Young,448.8,7.0,0.6
1963,From Russia with Love,Sean Connery,Terence Young,543.8,12.6,1.6
1964,Goldfinger,Sean Connery,Guy Hamilton,820.4,18.6,3.2


## Retrieve Rows by Index Label with `.loc[]` Accessor

In [8]:
df = pd.read_csv("jamesbond.csv", index_col = "Film")
# in pandas, it's easier for it to find a value when it's sorted than random
df.sort_index(inplace = True)
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [9]:
df.loc["Goldfinger"]
df.loc["GoldenEye"]
# bond.loc["Sacred Bond"] # doesn't exist

Year                            1995
Actor                 Pierce Brosnan
Director             Martin Campbell
Box Office                     518.5
Budget                          76.9
Bond Actor Salary                5.1
Name: GoldenEye, dtype: object

In [10]:
# if there are multiple rows with the same index, you will get a dataframe instead of a series
df.loc["Casino Royale"]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [11]:
df.loc["Diamonds Are Forever":"From Russia with Love"] 
# finding the indexes for start and end point
# includes the rows in between

df.loc["Diamonds Are Forever":"From Russia with Love":2]
# adding another colon and then 2, skips one row

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6


In [12]:
df.loc["GoldenEye":] 
# gets the index start until the end of the dataframe

df.loc[:"On Her Majesty's Secret Service"]
# gets from the beginning of dataframe to the index end specified

Output = None # so it doesn't clog up the screen

In [13]:
df.loc[["Die Another Day","Octopussy"]]
# df.loc[["Octopussy","Die Another Day"]]
# the sequence of the indexes in the list matters!

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Die Another Day,2002,Pierce Brosnan,Lee Tamahori,465.4,154.2,17.9
Octopussy,1983,Roger Moore,John Glen,373.8,53.9,7.8


In [14]:
"Gold Bond" in df.index # how to check if the index exists

False

## Retrieve Rows by Index Position with `.iloc[]` Accessor

In [15]:
df = pd.read_csv("jamesbond.csv", index_col = "Film")
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [16]:
df.iloc[1] 
# makes a series
# finds the index position using integer values
# even if you assign an index (like Film) it will only work with integer values

df.iloc[0:5]
# makes a new dataframe
# retrieves the row from the left side of the colon and the rows between the indexes 
# the right side of the colon is not included

df.iloc[[13,20]] 
# only gets the rows from the given index


Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
The World Is Not Enough,1999,Pierce Brosnan,Michael Apted,439.5,158.3,13.5


In [17]:
df = pd.read_csv("jamesbond.csv")
df.head(3)

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [18]:
df.iloc[4:8]
df.iloc[20:] # includes index on the left side of the colon
df.iloc[:4] # doesn't include the index on the right side of the colon

Unnamed: 0,Film,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
0,Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
1,From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
2,Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
3,Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7


In [19]:
df.set_index("Film", inplace = True)
df.sort_index(inplace = True)
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [20]:
df.loc["GoldenEye"]

Year                            1995
Actor                 Pierce Brosnan
Director             Martin Campbell
Box Office                     518.5
Budget                          76.9
Bond Actor Salary                5.1
Name: GoldenEye, dtype: object

In [21]:
df.iloc[0]

Year                        1985
Actor                Roger Moore
Director               John Glen
Box Office                 275.2
Budget                      54.5
Bond Actor Salary            9.1
Name: A View to a Kill, dtype: object

In [22]:
df.iloc[[0,15,25]] 
# multiple rows 

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


## Second Arguments to `.loc[]` and `.iloc[]` Accessors

In [23]:
df = pd.read_csv("jamesbond.csv", index_col = "Film")
df.sort_index(inplace = True)
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [24]:
df.loc["Moonraker","Actor"] 
# the first argument asks for the row index
# the second argument asks for the column name
# the output is the value from that specific row index and column name 
# kind of like [x,y] 
df.loc["Moonraker","Director"]

'Lewis Gilbert'

In [25]:
df.loc["Moonraker", ["Director", "Box Office"]]
df.loc[["Moonraker", "A View to a Kill"], ["Director", "Box Office"]]
# can make a list as well [[x1,x2],[y1,y2]]
# makes a dataframe

Unnamed: 0_level_0,Director,Box Office
Film,Unnamed: 1_level_1,Unnamed: 2_level_1
Moonraker,Lewis Gilbert,535.0
A View to a Kill,John Glen,275.2


In [86]:
# can also give a range for row index and col name

df.loc["Moonraker", "Year":"Budget"] 
# makes a series

df.loc["Moonraker":"Thunderball", "Director"]
# makes a series

df.loc["Moonraker":"Thunderball", "Director":"Budget"]
# makes a dataframe

df.loc["Moonraker":, "Director":]
# starting from the given row index and col name

df.loc[:"Moonraker",:"Budget"]
# from beginning to the given row index and col name 

Output = None

In [27]:
df.iloc[14,2]

'John Glen'

In [28]:
df.iloc[14,2:5]

Director      John Glen
Box Office        373.8
Budget             53.9
Name: Octopussy, dtype: object

In [29]:
df.iloc[14:20,2:5]

Unnamed: 0_level_0,Director,Box Office,Budget
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Octopussy,John Glen,373.8,53.9
On Her Majesty's Secret Service,Peter R. Hunt,291.5,37.3
Quantum of Solace,Marc Forster,514.2,181.4
Skyfall,Sam Mendes,943.5,170.2
Spectre,Sam Mendes,726.7,206.3
The Living Daylights,John Glen,313.5,68.8


In [85]:
df.iloc[:14, :5]
Output = None

In [31]:
df.iloc[14:,3:]

Unnamed: 0_level_0,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Octopussy,373.8,53.9,7.8
On Her Majesty's Secret Service,291.5,37.3,0.6
Quantum of Solace,514.2,181.4,8.1
Skyfall,943.5,170.2,14.5
Spectre,726.7,206.3,
The Living Daylights,313.5,68.8,5.2
The Man with the Golden Gun,334.0,27.7,
The Spy Who Loved Me,533.0,45.1,
The World Is Not Enough,439.5,158.3,13.5
Thunderball,848.1,41.9,4.7


In [32]:
df.iloc[13, [2,4,5]]

Director             Irvin Kershner
Budget                           86
Bond Actor Salary               NaN
Name: Never Say Never Again, dtype: object

In [33]:
df.iloc[[14,17], 2:4]

Unnamed: 0_level_0,Director,Box Office
Film,Unnamed: 1_level_1,Unnamed: 2_level_1
Octopussy,John Glen,373.8
Skyfall,Sam Mendes,943.5


## Set New Value for a Specific Cell

In [34]:
df = pd.read_csv("jamesbond.csv", index_col = "Film")
df.sort_index(inplace = True)
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [35]:
df.loc["Dr. No","Actor"] = "Sir Sean Connery"

In [36]:
df.loc["Dr. No", "Actor"]

'Sir Sean Connery'

In [37]:
df.loc["Dr. No", ["Box Office", "Budget", "Bond Actor Salary"]] = [448000000, 7000000, 600000]
# don't forget to match the sequence of the list in .loc[] and the list on the right side

In [38]:
df.loc["Dr. No", ["Box Office", "Budget", "Bond Actor Salary"]]

Box Office           4.48e+08
Budget                  7e+06
Bond Actor Salary      600000
Name: Dr. No, dtype: object

## Set Multiple Values in `DataFrame`

In [39]:
df = pd.read_csv("jamesbond.csv", index_col = "Film")
df.sort_index(inplace = True)
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [40]:
sean_connery = df["Actor"] == "Sean Connery"

In [41]:
# the WRONG way to do it
# df[sean_connery]["Actor"] = "Sir Sean Connery"

### THE ERROR MESSAGE ###
#
# <ipython-input-126-4797cc5cddc7>:2: SettingWithCopyWarning: 
# A value is trying to be set on a copy of a slice from a DataFrame.
# Try using .loc[row_indexer,col_indexer] = value instead
#
#See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
#  df[sean_connery]["Actor"] = "Sir Sean Connery"
#
#########################

In [42]:
# the RIGHT way
df.loc[sean_connery, "Actor"] = "Sir Sean Connery"
# first argument: You want to select all the rows where Sean Connery is the actor
# second argument: You want to select the specific column "Actor" of the selected rows

# to check:
df.value_counts("Actor")

Actor
Sir Sean Connery    7
Roger Moore         7
Pierce Brosnan      4
Daniel Craig        4
Timothy Dalton      2
George Lazenby      1
David Niven         1
dtype: int64

## Rename Index Labels or Columns in a `DataFrame`

In [43]:
df = pd.read_csv("jamesbond.csv", index_col = "Film")
df.sort_index(inplace = True)
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


#### Using `.rename()`

In [44]:
# using mapper in parameters
# all of these have the same results: 

df.rename(mapper = {"GoldenEye":"Golden Eye",
                    "The World Is Not Enough":"Best Bond Movie Ever"})
df.rename(mapper = {"GoldenEye":"Golden Eye",
                    "The World Is Not Enough":"Best Bond Movie Ever"}, axis = 0)
df.rename(mapper = {"GoldenEye":"Golden Eye",
                    "The World Is Not Enough":"Best Bond Movie Ever"}, axis = "rows")
df.rename(mapper = {"GoldenEye":"Golden Eye",
                    "The World Is Not Enough":"Best Bond Movie Ever"}, axis = "index")

Output = None

In [45]:
# using index in parameters

df.rename(index = {"GoldenEye":"Golden Eye",
                   "The World Is Not Enough":"Best Bond Movie Ever?"},
         inplace = True)
df

Output = None
# easier to use if you're looking for the index 
# no need to specify axis


In [46]:
df.head(1)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1


In [84]:
df.rename(mapper = {"Actor":"Dapper Men",
                   "Box Office":"Revenue"}, 
          axis = 1)
df.rename(mapper = {"Actor":"Dapper Men",
                   "Box Office":"Revenue"}, 
          axis = "columns")
df.rename(columns = {"Actor":"Dapper Men",
                   "Box Office":"Revenue"}, 
          inplace = True)

df
Output = None

In [48]:
df.head(1)

Unnamed: 0_level_0,Year,Dapper Men,Director,Revenue,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1


#### Another way to rename 

In [49]:
df.columns = ["Year of Release", "Actor", "Director", "Gross", "Cost", "Salary"]
df.head(1)

Unnamed: 0_level_0,Year of Release,Actor,Director,Gross,Cost,Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1


## Delete Rows or Columns from `DataFrame`

In [50]:
df = pd.read_csv("jamesbond.csv", index_col = "Film")
df.sort_index(inplace = True)
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


#### Using `.drop()`

In [51]:
# to delete a row
df.drop(["A View to a Kill","Die Another Day","From Russia with Love"])
df.drop("Casino Royale", inplace = True)
df
Output = None

In [52]:
# to delete a column
df.drop(["Box Office", "Bond Actor Salary","Actor"], axis = 1, inplace = True)
df
Output = None

#### Using `.pop()`

In [53]:
df = pd.read_csv("jamesbond.csv", index_col = "Film")
df.sort_index(inplace = True)
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [54]:
actor = df.pop("Actor")
# removes from dataframe and stores it into a series 
# like cut and paste

In [83]:
actor
Output = None

#### Using `del` 

In [56]:
df = pd.read_csv("jamesbond.csv", index_col = "Film")
df.sort_index(inplace = True)
df.head(3)
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 26 entries, A View to a Kill to You Only Live Twice
Data columns (total 6 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Year               26 non-null     int64  
 1   Actor              26 non-null     object 
 2   Director           26 non-null     object 
 3   Box Office         26 non-null     float64
 4   Budget             26 non-null     float64
 5   Bond Actor Salary  18 non-null     float64
dtypes: float64(3), int64(1), object(2)
memory usage: 1.4+ KB


In [57]:
del df["Director"]

In [58]:
del df["Year"]

In [82]:
df
Output = None

## Create Random Sample

In [60]:
df = pd.read_csv("jamesbond.csv", index_col = "Film")
df.sort_index(inplace = True)
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [61]:
# random rows
df.sample() # default is one random row
df.sample(n = 5) # 5 random rows
df.sample(frac = .25) # gets 25% of the population

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Tomorrow Never Dies,1997,Pierce Brosnan,Roger Spottiswoode,463.2,133.9,10.0
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,


In [81]:
# random columns
df.sample(n = 3, axis = 1) 
Output = None

## The `.nsmallest()` and `.nlargest()` Methods

In [63]:
df = pd.read_csv("jamesbond.csv", index_col = "Film")
df.sort_index(inplace = True)
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [64]:
df.sort_values("Box Office", ascending = False).head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [65]:
df.nlargest(3,"Box Office") # top 3 in Box office

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [66]:
df.nsmallest(3, "Box Office") # top 3 flops 

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6


In [67]:
df.nlargest(3, "Budget")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
Quantum of Solace,2008,Daniel Craig,Marc Forster,514.2,181.4,8.1
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5


In [68]:
df.nsmallest(6,"Bond Actor Salary")

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
On Her Majesty's Secret Service,1969,George Lazenby,Peter R. Hunt,291.5,37.3,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


In [69]:
df["Box Office"].nlargest(3)

Film
Skyfall        943.5
Thunderball    848.1
Goldfinger     820.4
Name: Box Office, dtype: float64

In [70]:
df["Bond Actor Salary"].nsmallest(6)

Film
Dr. No                             0.6
On Her Majesty's Secret Service    0.6
From Russia with Love              1.6
Goldfinger                         3.2
Casino Royale                      3.3
You Only Live Twice                4.4
Name: Bond Actor Salary, dtype: float64

In [71]:
df["Year"].nsmallest(3)

Film
Dr. No                   1962
From Russia with Love    1963
Goldfinger               1964
Name: Year, dtype: int64

## Filtering with the `.where()` Method

In [72]:
df = pd.read_csv("jamesbond.csv", index_col = "Film")
df.sort_index(inplace = True)
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [74]:
mask1 = df["Actor"] == "Sean Connery"
df[mask1]

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


In [76]:
df.where(mask1) # makes a new dataframe
Output = None

In [80]:
mask2 = df["Box Office"] > 800
df.where(mask1 & mask2, other = "0" )

Output = None

## The `.query()` Method

In [3]:
df = pd.read_csv("jamesbond.csv",index_col = "Film")
df.sort_index(inplace = True)
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [10]:
df.columns # turns the columns to an array

Index(['Year', 'Actor', 'Director', 'Box Office', 'Budget',
       'Bond Actor Salary'],
      dtype='object')

In [11]:
# before using query, you need to check and replace column names that have spaces (replace spaces with ___ )
df.columns = [column_name.replace(" ","_") for column_name in df.columns]
df.head(1)

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1


In [12]:
df.query('Actor == "Sean Connery"') # just make the quotations different 

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Never Say Never Again,1983,Sean Connery,Irvin Kershner,380.0,86.0,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7
You Only Live Twice,1967,Sean Connery,Lewis Gilbert,514.2,59.9,4.4


In [13]:
df.query('Director == "Guy Hamilton"')

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Live and Let Die,1973,Roger Moore,Guy Hamilton,460.3,30.8,
The Man with the Golden Gun,1974,Roger Moore,Guy Hamilton,334.0,27.7,


In [14]:
df.query("Director == 'Terence Young'") # can switch ""

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Dr. No,1962,Sean Connery,Terence Young,448.8,7.0,0.6
From Russia with Love,1963,Sean Connery,Terence Young,543.8,12.6,1.6
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7


In [20]:
# can also use not equal
df.query("Actor != 'Roger Moore'").head(3) 

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
Diamonds Are Forever,1971,Sean Connery,Guy Hamilton,442.5,34.7,5.8


In [16]:
df.query("Box_Office > 600")

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Goldfinger,1964,Sean Connery,Guy Hamilton,820.4,18.6,3.2
Skyfall,2012,Daniel Craig,Sam Mendes,943.5,170.2,14.5
Spectre,2015,Daniel Craig,Sam Mendes,726.7,206.3,
Thunderball,1965,Sean Connery,Terence Young,848.1,41.9,4.7


In [23]:
# can use AND and OR no need to use (&) and (|) 
df.query("Actor == 'Roger Moore' and Director == 'John Glen'")
df.query("Actor == 'Roger Moore' or Director == 'John Glen'").head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
For Your Eyes Only,1981,Roger Moore,John Glen,449.4,60.2,
Licence to Kill,1989,Timothy Dalton,John Glen,250.9,56.7,7.9


In [22]:
# can use IN and NOT IN 
df.query("Actor in ['Timothy Dalton','George Lazenby']")
df.query("Actor not in ['Timothy Dalton','George Lazenby']").head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box_Office,Budget,Bond_Actor_Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


## A Review of the `.apply()` Method on Single Columns

In [39]:
df = pd.read_csv("jamesbond.csv", index_col = "Film")
df.sort_index(inplace = True)
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [37]:
def convert_to_string_and_add_millions(number):
    return str(number) + " MILLIONS!"

df["Box Office"] = df["Box Office"].apply(convert_to_string_and_add_millions)

df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2 MILLIONS!,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5 MILLIONS!,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0 MILLIONS!,85.0,


In [38]:
df["Budget"] = df["Budget"].apply(convert_to_string_and_add_millions)
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2 MILLIONS!,54.5 MILLIONS!,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5 MILLIONS!,145.3 MILLIONS!,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0 MILLIONS!,85.0 MILLIONS!,


In [41]:
columns = ["Box Office", "Budget", "Bond Actor Salary"]
for col in columns: # col is a temporary value for the columns list 
    df[col] = df[col].apply(convert_to_string_and_add_millions)

In [42]:
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2 MILLIONS!,54.5 MILLIONS!,9.1 MILLIONS!
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5 MILLIONS!,145.3 MILLIONS!,3.3 MILLIONS!
Casino Royale,1967,David Niven,Ken Hughes,315.0 MILLIONS!,85.0 MILLIONS!,nan MILLIONS!


## The `.apply()` Method with Row Values

In [43]:
df = pd.read_csv("jamesbond.csv", index_col = "Film")
df.sort_index(inplace = True)
df.head(3) 

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


In [47]:
# list:  [year, actor, director, box office, budget, bond actor salary]
# index: [  0     1       2          3         4         5        6   ]

def good_movie(row):
    
    actor = row[1]
    budget = row[4]
    
    if actor == "Pierce Brosnan":
        return "The Best"
    elif actor == "Roger Moore" and budget > 40: 
        return "Enjoyable"
    else:
        return "I have no clue"
    
df.apply(good_movie, axis = "columns") # across columns whenever it feeds data in 

Film
A View to a Kill                        Enjoyable
Casino Royale                      I have no clue
Casino Royale                      I have no clue
Diamonds Are Forever               I have no clue
Die Another Day                          The Best
Dr. No                             I have no clue
For Your Eyes Only                      Enjoyable
From Russia with Love              I have no clue
GoldenEye                                The Best
Goldfinger                         I have no clue
Licence to Kill                    I have no clue
Live and Let Die                   I have no clue
Moonraker                               Enjoyable
Never Say Never Again              I have no clue
Octopussy                               Enjoyable
On Her Majesty's Secret Service    I have no clue
Quantum of Solace                  I have no clue
Skyfall                            I have no clue
Spectre                            I have no clue
The Living Daylights               I have no 

## The `.copy()` Method

In [74]:
df = pd.read_csv("jamesbond.csv", index_col = "Film")
df.sort_index(inplace = True)
df.head(3)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


#### Changing a row value without using `.copy()`

In [75]:
directors = df["Director"]
directors.head(3)

Film
A View to a Kill          John Glen
Casino Royale       Martin Campbell
Casino Royale            Ken Hughes
Name: Director, dtype: object

In [76]:
directors["A View to a Kill"] = "Mister John Glen"

### ERROR MESSAGE ### 
# <ipython-input-52-55f5cdba50a0>:1: SettingWithCopyWarning: 
#A value is trying to be set on a copy of a slice from a DataFrame
#
#See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
#  directors["A View to a Kill"] = "Mister John Glen"
#####################

# check out the link for more guides on indexing and selecting data

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  directors["A View to a Kill"] = "Mister John Glen"


In [77]:
directors.head(3)

Film
A View to a Kill    Mister John Glen
Casino Royale        Martin Campbell
Casino Royale             Ken Hughes
Name: Director, dtype: object

In [78]:
df.head(3) # the dataframe was affected because of the operation

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,Mister John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,


#### Changing a row value using `.copy()`

In [69]:
# when running this sub-section re-enter the first cell of this section
directors = df["Director"].copy() # brand new, unique, series object
directors.head(3)

Film
A View to a Kill          John Glen
Casino Royale       Martin Campbell
Casino Royale            Ken Hughes
Name: Director, dtype: object

In [70]:
directors["A View to a Kill"] = "Mister John Glen" 
# no error message because it's a copy and not overwriting the original dataframe

In [67]:
directors.head(3) 

Film
A View to a Kill    Mister John Glen
Casino Royale        Martin Campbell
Casino Royale             Ken Hughes
Name: Director, dtype: object

In [72]:
df.head(3) # still the same

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,1985,Roger Moore,John Glen,275.2,54.5,9.1
Casino Royale,2006,Daniel Craig,Martin Campbell,581.5,145.3,3.3
Casino Royale,1967,David Niven,Ken Hughes,315.0,85.0,
