# ================ USER DEFINED FUNCTIONS ================

In [1]:
import pandas as pd
import numpy as np

##### Applying a Function to a Column

In [3]:
df = pd.DataFrame({
    "store": ["A", "A", "B", "B"],
    "sales": [10, 20, 15, 25]
})
df

Unnamed: 0,store,sales
0,A,10
1,A,20
2,B,15
3,B,25


In [5]:
# define function
def double(x):
    return x * 2

##### map

In [9]:
# apply function to column
df["double_sales"] = df["sales"].map(double)
df

Unnamed: 0,store,sales,double_sales
0,A,10,20
1,A,20,40
2,B,15,30
3,B,25,50


##### apply

In [10]:
# apply function to column
df["plus100_sales"] = df["sales"].apply(lambda x: x + 100)
df

Unnamed: 0,store,sales,double_sales,plus100_sales
0,A,10,20,110
1,A,20,40,120
2,B,15,30,115
3,B,25,50,125


##### Applying a Function Across Multiple Columns (Row-wise)

In [11]:
def custom_tax(row):
    if row["store"] == "A":
        return row["sales"] * 1.05
    else:
        return row["sales"]

df["taxed_sales"] = df.apply(custom_tax, axis=1)
df

Unnamed: 0,store,sales,double_sales,plus100_sales,taxed_sales
0,A,10,20,110,10.5
1,A,20,40,120,21.0
2,B,15,30,115,15.0
3,B,25,50,125,25.0


##### Groupby - with - transform

In [12]:
def my_range(series):
    return series.max() - series.min()

df["sales_range"] = (
    df.groupby("store")["sales"]
      .transform(my_range)   # keeps the same shape as original df
)
df

Unnamed: 0,store,sales,double_sales,plus100_sales,taxed_sales,sales_range
0,A,10,20,110,10.5,10
1,A,20,40,120,21.0,10
2,B,15,30,115,15.0,10
3,B,25,50,125,25.0,10


##### ❗❗NaN in result if not used 'transform'❗❗

In [None]:
def my_range(series):
    return series.max() - series.min()

df["sales_range"] = (
    df.groupby("store")["sales"]
      .apply(my_range)   
)
df

Unnamed: 0,store,sales,double_sales,plus100_sales,taxed_sales,sales_range
0,A,10,20,110,10.5,
1,A,20,40,120,21.0,
2,B,15,30,115,15.0,
3,B,25,50,125,25.0,


##### Returning Multiple Values

In [15]:
summary = df.groupby("store")["sales"].apply(
    lambda s: pd.Series({"mean": s.mean(), "std": s.std()})
)
summary

store      
A      mean    15.000000
       std      7.071068
B      mean    20.000000
       std      7.071068
Name: sales, dtype: float64

##### Vectorized Custom Functions (UDFs with NumPy)

In [18]:
def zscore(x):
    return (x - np.mean(x)) / np.std(x)

df["zscore"] = df.groupby("store")["sales"].transform(zscore)
df


Unnamed: 0,store,sales,double_sales,plus100_sales,taxed_sales,sales_range,zscore
0,A,10,20,110,10.5,,-1.0
1,A,20,40,120,21.0,,1.0
2,B,15,30,115,15.0,,-1.0
3,B,25,50,125,25.0,,1.0
