In [1]:
import pandas as pd
from typing import List, Optional

In [2]:
def load_data(path: str = 'data/raw/siab.csv') -> pd.DataFrame:
    """
    Load raw dataset.

    Args:
        path: Filepath to CSV data.
    Returns:
        DataFrame of raw data.
    """
    return pd.read_csv(path)

In [3]:
def filter_data(df: pd.DataFrame, exclude: Optional[List[str]] = None) -> pd.DataFrame:
    """
    Exclude specified subgroups (e.g., sensitive groups) from df.

    Args:
        df: Input DataFrame.
        exclude: List of subgroup values to drop (column 'subgroup').
    Returns:
        Filtered DataFrame.
    """
    if exclude:
        return df[~df['subgroup'].isin(exclude)].reset_index(drop=True)
    return df