In [None]:
# @title SPLITFUNCTION
def split_dataset(df, target_col, random_state=42, test_ratio=0.2):
    """
    Splits the given DataFrame into train/test sets.

    Parameters
    ----------
    df : pandas.DataFrame
        The full dataset containing features + target.
    target_col : str
        The name of the target column (y).
    random_state : int, default=42
        Random seed for reproducibility.
    test_ratio : float, default=0.2
        Proportion of data to use for test set (e.g. 0.2 = 20%).

    Returns
    -------
    X_train, X_test, y_train, y_test : pd.DataFrame, pd.DataFrame, pd.Series, pd.Series
        Split datasets ready for preprocessing or model fitting.
    """

    if target_col not in df.columns:
        raise ValueError(f"Target column '{target_col}' not found in DataFrame.")

    X = df.drop(columns=[target_col])
    y = df[target_col]

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_ratio, stratify=y if len(y.unique()) < 20 else None,
        random_state=random_state
    )

    print(f"✅ Data split complete: {X_train.shape[0]} train / {X_test.shape[0]} test samples")
    return X_train, X_test, y_train, y_test


# Example usage:
# X_train, X_test, y_train, y_test = split_dataset(df, target_col='status_label', random_state=42, test_ratio=0.2)
