In [0]:
import re
from pyspark.sql import DataFrame

def clean_column_names(df: DataFrame) -> DataFrame:
    """
    Standardizes column names in a DataFrame for easier processing and consistency.

    Transformations applied:
    - Lowercases all letters
    - Replaces spaces and any non-alphanumeric characters with underscores
    - Collapses multiple underscores into one
    - Removes leading and trailing underscores

    Args:
    df (pyspark.sql.DataFrame): DataFrame to be cleaned

    Returns:
    pyspark.sql.DataFrame: A new DataFrame with cleaned and standardized column names.
    """
    cleaned_cols = []
    for col in df.columns:
        col_clean = col.lower()
        # replace any sequence of non-alphanumeric chars with underscore
        col_clean = re.sub(r'[^a-z0-9]+', '_', col_clean)
        # remove leading/trailing underscores
        col_clean = col_clean.strip('_')
        cleaned_cols.append(col_clean)
    
    # rename columns in the DataFrame
    for old, new in zip(df.columns, cleaned_cols):
        df = df.withColumnRenamed(old, new)
    
    return df

# def clean_decimals(df: DataFrame, columns:, null_values: Optional[str] = None) -> DataFrame:
#     """
#     Cleans columns of 

In [0]:
from typing import List, Union, Optional
from pyspark.sql import DataFrame
from pyspark.sql.functions import col, regexp_replace, when
from pyspark.sql.types import DoubleType

def clean_decimal_columns(
    df: DataFrame,
    columns: Union[str, List[str]],
    replace_nulls: Optional[Union[str, List[str]]] = None,
    decimal_separator: str = ".",
    thousands_separator: str = ","
) -> DataFrame:
    """
    Cleans one or more decimal columns stored as strings:
      - Replaces specified string values with null
      - Removes thousands separators
      - Converts decimal separator to dot
      - Casts column to DoubleType

    Args:
        df (DataFrame): Spark DataFrame to transform.
        columns (str | list[str]): Column name or list of columns to clean.
        replace_nulls (list[str], optional): List of string values to replace with null before casting.
        decimal_separator (str, optional): Decimal separator in the string values (default ".").
        thousands_separator (str, optional): Thousands separator in the string values (default ",").

    Returns:
        DataFrame: Transformed Spark DataFrame with cleaned decimal columns.
    """
    if isinstance(columns, str):
        columns = [columns]

    for col_name in columns:
        cleaned_col = col(col_name)

        # Replace specified values with null
        if replace_nulls:
            if isinstance(replace_nulls, str):
                replace_nulls = [replace_nulls]
            for val in replace_nulls:
                df = df.withColumn(col_name, when(cleaned_col == val, None).otherwise(cleaned_col))

        # Remove thousands separator and normalize decimal separator
        if thousands_separator:
            # df = df.withColumn(col_name, regexp_replace(cleaned_col, f"\\{thousands_separator}", ""))
            cleaned_col = regexp_replace(cleaned_col, f"\\{thousands_separator}", "")

        if decimal_separator != ".":
            # df = df.withColumn(col_name, regexp_replace(cleaned_col, f"\\{decimal_separator}", "."))
            cleaned_col = regexp_replace(cleaned_col, f"\\{decimal_separator}", ".")

        # Cast to double
        # df = df.withColumn(col_name, cleaned_col.cast("double"))
        cleaned_col = cleaned_col.cast(DoubleType())

        df = df.withColumn(col_name, cleaned_col)

    return df