In [1]:
import pandas as pd
import numpy as np

In [3]:
# pandas apply
data = {
    "State": ["California", "Texas", "New York", "Florida", "Illinois", "Georgia", "Washington"],
    "AvgTempF": [59, 65, 52, 70, 51, 67, 50],
    "SummerTemp":[79, 85, 62, 90, 60, 77, 60],
    "WinterTemp":[49, 45, 42, 50, 41, 57, 40]
}
df = pd.DataFrame(data)
df

Unnamed: 0,State,AvgTempF,SummerTemp,WinterTemp
0,California,59,79,49
1,Texas,65,85,45
2,New York,52,62,42
3,Florida,70,90,50
4,Illinois,51,60,41
5,Georgia,67,77,57
6,Washington,50,60,40


In [None]:
# Example 1 - if else state location

# One of the most common use cases for apply is writing an if/else function and creating a new column based around the logic. In this example we want to classify the state location

def get_region(state):
    south = ["Texas", "Florida", "Georgia"]
    west = ["California", "Washington"]
    midwest = ["Illinois"]
    northeast = ["New York"]

    if state in south:
        return "South"
    elif state in west:
        return "West"
    elif state in midwest:
        return "Midwest"
    elif state in northeast:
        return "Northeast"
    else:
        return "Unknown"

df["Region"] = df["State"].apply(get_region)
df

Unnamed: 0,State,AvgTempF,SummerTemp,WinterTemp,Region
0,California,59,79,49,West
1,Texas,65,85,45,South
2,New York,52,62,42,Northeast
3,Florida,70,90,50,South
4,Illinois,51,60,41,Midwest
5,Georgia,67,77,57,South
6,Washington,50,60,40,West


In [5]:
# Example 2 Convert Fahrenheit to Celsius Using a Lambda Function
df["AvgTempC"] = df["AvgTempF"].apply(lambda x: (x - 32) * 5/9)
df

Unnamed: 0,State,AvgTempF,SummerTemp,WinterTemp,Region,AvgTempC
0,California,59,79,49,West,15.0
1,Texas,65,85,45,South,18.333333
2,New York,52,62,42,Northeast,11.111111
3,Florida,70,90,50,South,21.111111
4,Illinois,51,60,41,Midwest,10.555556
5,Georgia,67,77,57,South,19.444444
6,Washington,50,60,40,West,10.0


In [6]:
# Example 3 Multiple Columns
df[["SummerTempCelc", "WinterTempCelc"]] = df[["SummerTemp", "WinterTemp"]].apply(lambda x: (x - 32) * 5/9)
df

Unnamed: 0,State,AvgTempF,SummerTemp,WinterTemp,Region,AvgTempC,SummerTempCelc,WinterTempCelc
0,California,59,79,49,West,15.0,26.111111,9.444444
1,Texas,65,85,45,South,18.333333,29.444444,7.222222
2,New York,52,62,42,Northeast,11.111111,16.666667,5.555556
3,Florida,70,90,50,South,21.111111,32.222222,10.0
4,Illinois,51,60,41,Midwest,10.555556,15.555556,5.0
5,Georgia,67,77,57,South,19.444444,25.0,13.888889
6,Washington,50,60,40,West,10.0,15.555556,4.444444


In [7]:
# Example 4 Built in functions
df["AvglogTemp"] = df["AvgTempF"].apply(np.log)
df

Unnamed: 0,State,AvgTempF,SummerTemp,WinterTemp,Region,AvgTempC,SummerTempCelc,WinterTempCelc,AvglogTemp
0,California,59,79,49,West,15.0,26.111111,9.444444,4.077537
1,Texas,65,85,45,South,18.333333,29.444444,7.222222,4.174387
2,New York,52,62,42,Northeast,11.111111,16.666667,5.555556,3.951244
3,Florida,70,90,50,South,21.111111,32.222222,10.0,4.248495
4,Illinois,51,60,41,Midwest,10.555556,15.555556,5.0,3.931826
5,Georgia,67,77,57,South,19.444444,25.0,13.888889,4.204693
6,Washington,50,60,40,West,10.0,15.555556,4.444444,3.912023


In [8]:
# Example 5 of applying a function row-wise to create a custom description

# Say you want to grab multiple pieces of information from a row. This time, we have to pass in a row to our function and then specify columns within it.

def create_description(row):
    return f"{row['State']} has an average temperature of {row['AvgTempF']}°F ({row['AvgTempC']:.1f}°C)."

df["Description"] = df.apply(create_description, axis=1)
df["Description"]

0    California has an average temperature of 59°F ...
1    Texas has an average temperature of 65°F (18.3...
2    New York has an average temperature of 52°F (1...
3    Florida has an average temperature of 70°F (21...
4    Illinois has an average temperature of 51°F (1...
5    Georgia has an average temperature of 67°F (19...
6    Washington has an average temperature of 50°F ...
Name: Description, dtype: object

In [10]:
# Example 6/7 with args and kwargs
def temp_level(temp, high_temp, low_temp):
  if temp > high_temp:
    return "hot"
  elif temp < low_temp:
    return "cold"
  else:
    return "medium"

df['temp_rating_summer'] = df['SummerTemp'].apply(temp_level, args = (80, 60))

df

Unnamed: 0,State,AvgTempF,SummerTemp,WinterTemp,Region,AvgTempC,SummerTempCelc,WinterTempCelc,AvglogTemp,Description,temp_rating_summer
0,California,59,79,49,West,15.0,26.111111,9.444444,4.077537,California has an average temperature of 59°F ...,medium
1,Texas,65,85,45,South,18.333333,29.444444,7.222222,4.174387,Texas has an average temperature of 65°F (18.3...,hot
2,New York,52,62,42,Northeast,11.111111,16.666667,5.555556,3.951244,New York has an average temperature of 52°F (1...,medium
3,Florida,70,90,50,South,21.111111,32.222222,10.0,4.248495,Florida has an average temperature of 70°F (21...,hot
4,Illinois,51,60,41,Midwest,10.555556,15.555556,5.0,3.931826,Illinois has an average temperature of 51°F (1...,medium
5,Georgia,67,77,57,South,19.444444,25.0,13.888889,4.204693,Georgia has an average temperature of 67°F (19...,medium
6,Washington,50,60,40,West,10.0,15.555556,4.444444,3.912023,Washington has an average temperature of 50°F ...,medium


In [12]:
df['temp_rating_winter'] = df['WinterTemp'].apply(temp_level, high_temp = 50, low_temp = 30)
df

Unnamed: 0,State,AvgTempF,SummerTemp,WinterTemp,Region,AvgTempC,SummerTempCelc,WinterTempCelc,AvglogTemp,Description,temp_rating_summer,temp_rating_winter
0,California,59,79,49,West,15.0,26.111111,9.444444,4.077537,California has an average temperature of 59°F ...,medium,medium
1,Texas,65,85,45,South,18.333333,29.444444,7.222222,4.174387,Texas has an average temperature of 65°F (18.3...,hot,medium
2,New York,52,62,42,Northeast,11.111111,16.666667,5.555556,3.951244,New York has an average temperature of 52°F (1...,medium,medium
3,Florida,70,90,50,South,21.111111,32.222222,10.0,4.248495,Florida has an average temperature of 70°F (21...,hot,medium
4,Illinois,51,60,41,Midwest,10.555556,15.555556,5.0,3.931826,Illinois has an average temperature of 51°F (1...,medium,medium
5,Georgia,67,77,57,South,19.444444,25.0,13.888889,4.204693,Georgia has an average temperature of 67°F (19...,medium,hot
6,Washington,50,60,40,West,10.0,15.555556,4.444444,3.912023,Washington has an average temperature of 50°F ...,medium,medium


In [13]:
df[['temp_rating_summer', 'temp_rating_winter']]

Unnamed: 0,temp_rating_summer,temp_rating_winter
0,medium,medium
1,hot,medium
2,medium,medium
3,hot,medium
4,medium,medium
5,medium,hot
6,medium,medium


In [14]:
# Example 8 apply function to each column axis = 0, can find a summary stat fast
def median(x):
    return x.median()

df[['AvgTempF', 'AvgTempC', 'SummerTemp', 'WinterTemp']].apply(median, axis = 0)
df

Unnamed: 0,State,AvgTempF,SummerTemp,WinterTemp,Region,AvgTempC,SummerTempCelc,WinterTempCelc,AvglogTemp,Description,temp_rating_summer,temp_rating_winter
0,California,59,79,49,West,15.0,26.111111,9.444444,4.077537,California has an average temperature of 59°F ...,medium,medium
1,Texas,65,85,45,South,18.333333,29.444444,7.222222,4.174387,Texas has an average temperature of 65°F (18.3...,hot,medium
2,New York,52,62,42,Northeast,11.111111,16.666667,5.555556,3.951244,New York has an average temperature of 52°F (1...,medium,medium
3,Florida,70,90,50,South,21.111111,32.222222,10.0,4.248495,Florida has an average temperature of 70°F (21...,hot,medium
4,Illinois,51,60,41,Midwest,10.555556,15.555556,5.0,3.931826,Illinois has an average temperature of 51°F (1...,medium,medium
5,Georgia,67,77,57,South,19.444444,25.0,13.888889,4.204693,Georgia has an average temperature of 67°F (19...,medium,hot
6,Washington,50,60,40,West,10.0,15.555556,4.444444,3.912023,Washington has an average temperature of 50°F ...,medium,medium
