# User-Defined Functions

In [1]:
import polars as pl
from datetime import datetime
import re

In [2]:
def fnToTitleCase(expr: pl.Expr) -> pl.Expr:
    return expr.str.strip_chars().str.split(" ").arr.eval(
        pl.element().str.to_lowercase().str.slice(0,1).str.to_uppercase() + pl.element().str.slice(1)
    ).arr.join(" ")

In [None]:
def fnRemoveSpecialChars(expr: pl.Expr) -> pl.Expr:
    # keep only alphanumeric, space, dash, underscore
    return expr.str.replace_all(r"[^0-9A-Za-z _-]", "")


In [None]:
def fnClean(expr: pl.Expr) -> pl.Expr:
    return fnToTitleCase(fnRemoveSpecialChars(expr.str.strip_chars()))


In [None]:
def fnTranslation(expr: pl.Expr) -> pl.Expr:
    return fnClean(expr).str.replace_all("[3]", "e") \
                        .str.replace_all("[4]", "a") \
                        .str.replace_all("[1]", "i") \
                        .str.replace_all("[0]", "o") \
                        .str.replace_all("[5]", "s")


In [None]:
def fnDateToInt(expr: pl.Expr) -> pl.Expr:
    return expr.str.strptime(pl.Date, "%Y-%m-%d", strict=False) \
               .dt.strftime("%d%m%Y") \
               .cast(pl.Int64)


In [None]:
def fnCleanValidateEmail(expr: pl.Expr) -> pl.Expr:
    cleaned = expr.str.replace_all(r"[^0-9A-Za-z_.@]", "").str.to_lowercase()

    # validation steps → Polars lacks complex IF logic, so use regex
    return cleaned.filter(
        cleaned.str.contains(r"^[A-Za-z0-9._]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$")
    )


In [None]:
df = df.with_columns([
    fnClean(pl.col("customer_name")).alias("customer_name"),
    fnRemoveSpecialChars(pl.col("phone")).alias("phone"),
    fnTranslation(pl.col("state")).alias("state"),
    fnDateToInt(pl.col("signup_date_str")).alias("signup_date_int"),
    fnCleanValidateEmail(pl.col("email")).alias("email")
])
