In [1]:
import pandas as pd

In [115]:
df = pd.read_csv("jamesbond.csv")

In [22]:
# set_index and reset_index Methods
# You can set index on DF creation using pd.read_csv("jamesbond.csv", index_col = "Film")

# df.set_index("Film", inplace = True)
# df.reset_index(inplace = True)

# To change index, reset_index first and then set_index in order to not lose old index as a column
# df.set_index("Film", inplace = True)
# df.reset_index(inplace = True)
# df.set_index("Year", inplace = True)
# df.reset_index(inplace = True)

In [131]:
# .loc[] Accessor
df = pd.read_csv("jamesbond.csv", index_col = "Film").sort_index()

# If one result, returns a Series
df.loc["Goldfinger"]
# If multiple results, returns a DF
df.loc["Casino Royale"]
# Can give ranges in .loc (INCLUSIVELY)
df.loc["Diamonds Are Forever":"From Russia with Love"]
# Can slice as well
df.loc["Diamonds Are Forever"::2]
# Can pull a list (Matches order of list not DF)
df.loc[["Die Another Day", "Diamonds Are Forever"]]

Output = False

In [88]:
# Retrieve Rows by Index Position with .iloc[] Accessor

df.iloc[0]
# Can give ranges in .iloc (Exclusively at the end)
df.iloc[3:7]
#Can pull a list
df.iloc[[3, 5, 9, 15]]

Output = False

In [89]:
# Second Arguments to loc and iloc Accessors

# Comma seperate the index and column
df.loc["Moonraker", "Actor"]
# Can still pass in multiples for the index and the column
df.loc[["Moonraker", "A View to a Kill"], ["Director", "Box Office"]]
# Can also still slice
df.loc["Moonraker"::3, "Director":"Budget"]

# iloc can do all of the same
df.iloc[12, 1]
df.iloc[1::4, [2, 4, 5]]

Output = False

In [119]:
# Set New Value for a Specific Cell
# mutatingDF = df

# Can update a single cell
mutatingDF.loc["Dr. No", "Actor"] = "Sir Sean Connery"
# Can update all cells in column
mutatingDF.loc[mutatingDF["Actor"] == "Sean Connery", "Actor"] = "Sir Sean Connery"
mutatingDF.loc[::,["Box Office", "Budget", "Bond Actor Salary"]] = mutatingDF.loc[::,["Box Office", "Budget", "Bond Actor Salary"]].mul(1000000)

In [127]:
# Rename labels in a DF using .rename()

# Mapper utilizes dictionary to rename
# Must indicate axis if you are not renaming the index
df.rename(mapper = {"GoldenEye": "Golden Eye",
                    "From Russia with Love": "From Russia With Love"},
         axis = "index")

# Can use index to rename without the axis call
df.rename(index = {"GoldenEye": "Golden Eye"})
# Renaming columns (axis = 1 OR axis = "columns")
df.rename(mapper = {"Year": "Release Date"}, axis = 1)
df.rename(columns = {"Release Date": "Year"})

# .columns attribute is able to be overwritten
# Must give a name for EVERY column
df.columns = ["Release Year", "Actor", "Director", "Box Office", "Budget", "Bond Actor Salary"]

In [133]:
# Delete from a DF
mutatingDF = df

# To drop a specific row at an index
mutatingDF.drop("A View to a Kill")
# To drop a column
mutatingDF.drop("Box Office", axis = "columns")

# Pop deletes and returns an element
# actor = mutatingDF.pop("Actor")

# del deletes an element without returning
# del mutatingDF["Director"]

Output = False

In [138]:
# Extracting a random sample

# Sample n rows
df.sample(3)
# Sample % of DF
df.sample(frac = .25)
# Can also pull sample of columns
df.sample(2, axis = "columns")
# Can chain samples
df.sample(3, axis = 1).sample(frac = .5)

Output = False

In [145]:
# .nsmallest and .nlargest Methods

# 3 Smallest Box Office Grosses
df.nsmallest(3, "Box Office").sort_values("Box Office")
# 5 Largest Budgets
df.nlargest(5, "Budget").sort_values("Budget").sort_values("Budget", ascending = False)

# Can perform on just the Series too
df["Year"].nsmallest(2)

Output = False

In [151]:
# Filtering with the where Method

mask = df["Actor"] == "Sean Connery"
# Shows NaN when mask = False
df.where(mask)
# Chain to find Sean Connery where box office > 800
mask2 = df["Box Office"] > 800
df.where(mask & mask2)

Unnamed: 0_level_0,Year,Actor,Director,Box Office,Budget,Bond Actor Salary
Film,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A View to a Kill,,,,,,
Casino Royale,,,,,,
Casino Royale,,,,,,
Diamonds Are Forever,,,,,,
Die Another Day,,,,,,
Dr. No,,,,,,
For Your Eyes Only,,,,,,
From Russia with Love,,,,,,
GoldenEye,,,,,,
Goldfinger,1964.0,Sean Connery,Guy Hamilton,820.4,18.6,3.2


In [162]:
# query Method

# argument must be a string and column names cannot have a string
df.columns = [name.replace(" ", "_") for name in df.columns]
df.query('Actor == "Sean Connery"')
df.query('Actor == "Roger Moore" and Director == "John Glen"')

Output = False

In [179]:
# apply Method for Whole DF
df = pd.read_csv("jamesbond.csv", index_col = "Film")

columnsToUpdate = ["Box Office", "Budget", "Bond Actor Salary"]
def dollarConvert(num):
    return "$" + str(num) + " M"
for j in columnsToUpdate:
    df[j] = df[j].apply(dollarConvert)
    
def movieQuality(row):
    actor = row[1]
    budget = row[4]
    
    if actor == "Pierce Brosnan":
        return "The Best"
    elif actor == "Roger Moore" and budget > "$40 M":
        return "Enjoyable"
    else:
        return "Neutral"
    
df.apply(movieQuality, axis = "columns")

Output = False

In [188]:
# .copy() Method creates a copy of the DF without tying it to the original

directors = df["Director"].copy()
directors["Dr. No"] = "Mr. Terence Young"
print("Mutated Value: %s" %directors["Dr. No"])
print("Original Value: %s" %df.loc["Dr. No", "Director"])
# print("directos Series value: %s, DF value: %s" %(directors["Dr. No"], df.loc["Dr. No", "Director"])

Mutated Value: Mr. Terence Young
Original Value: Terence Young
