In [1]:
import pandas as pd
from pydataset import data

In [2]:
df = data("tips")
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
1,16.99,1.01,Female,No,Sun,Dinner,2
2,10.34,1.66,Male,No,Sun,Dinner,3
3,21.01,3.5,Male,No,Sun,Dinner,3
4,23.68,3.31,Male,No,Sun,Dinner,2
5,24.59,3.61,Female,No,Sun,Dinner,4


In [3]:
df.day.value_counts()

Sat     87
Sun     76
Thur    62
Fri     19
Name: day, dtype: int64

## The .map method transforms a column's values with a dictionary
- A dictionary is like a list, but it's a label list.
- Perfect use of a .map is to transform abbreviated values into their full name

In [4]:
# Goal: Transform abbreviated days into full day names
# We're reassiging the column to be the transformed values
# For each key, when we use .map, we'll transform the column to be the value
day_names = {
    "Sat": "Saturday",
    "Sun": "Sunday",
    "Fri": "Friday",
    "Thur": "Thursday"
}
day_names

{'Sat': 'Saturday', 'Sun': 'Sunday', 'Fri': 'Friday', 'Thur': 'Thursday'}

In [5]:
# In the general form: dataframe.column.map({"input1": "output1", "input2": "output2"})
# The .map looks for the "key" to replace with the "value" from the dictionary
# And we'll need to re-assign the column, so the computer memory takes
df.day = df.day.map(day_names)
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
1,16.99,1.01,Female,No,Sunday,Dinner,2
2,10.34,1.66,Male,No,Sunday,Dinner,3
3,21.01,3.5,Male,No,Sunday,Dinner,3
4,23.68,3.31,Male,No,Sunday,Dinner,2
5,24.59,3.61,Female,No,Sunday,Dinner,4


## So what does .apply do?
- The .apply method takes in a function body/defintion to apply to rows or columns
- Any time I feel like I need a for-loop for a dataframe, I use .apply
- We'll use apply when the built-in pandas methods and operators like +, -, *, >, >=, == are not sufficient
- .apply will run a function on each row or column, depending on axis argument
- .apply might feel funny b/c it takes in a function as its input

In [6]:
# Remember how to "blow off the loop" with looping problems
# If I need a function to run on every row of a dataframe, I blow off pandas (for second)
# Let's say we need to transform "Sunday" into "Sun", "Saturday" into "Sat", etc..
# Let's pretend we don't have a dataframe, we only have a single string

In [7]:
# Let's build a simple function that takes in a variable 
# If the variable is "Saturday", then it returns "Sat", if "Sunday" returns "Sun", etc..
def abbreviate(string):
    if string == "Saturday":
        return "Sat"
    elif string == "Sunday":
        return "Sun"
    elif string == "Friday":
        return "Fri"
    elif string == "Thursday":
        return "Thur"

In [8]:
abbreviate("Saturday"), abbreviate("Thursday")

('Sat', 'Thur')

In [9]:
# So far, we run functions by typing the function name followed by parentheses
# .apply runs the function for us, so we do not use parentheses
# .apply will default to applying that function to rows
# If we use .apply on a series/column, .apply runs that function on each row of that column
# When a function like apply uses another function as its input, that's a higher order function
df.day = df.day.apply(abbreviate)
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
1,16.99,1.01,Female,No,Sun,Dinner,2
2,10.34,1.66,Male,No,Sun,Dinner,3
3,21.01,3.5,Male,No,Sun,Dinner,3
4,23.68,3.31,Male,No,Sun,Dinner,2
5,24.59,3.61,Female,No,Sun,Dinner,4


## .map vs .apply
- .map transforms values kind of like an if/elif/elif/elif
- .apply applies a function body to rows by default (or columns)
- .apply takes a function as its input argument
- .apply documentation https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.apply.html
- .apply is like a manager that runs the function you send it
- because .apply takes a function body, that's why you'll see lambdas going into .apply