### Bias & Fairness in Data: Distribution Check
**Description**: Load the Adult Income dataset and check for representation bias by analyzing the distribution of gender across different income levels.

In [2]:
import pandas as pd

def load_data(filepath):
    """
    Load the Adult Income dataset from a CSV file.
    Includes basic error handling for missing or malformed files.
    """
    try:
        df = pd.read_csv(filepath)
        return df
    except FileNotFoundError:
        print(f"Error: File '{filepath}' not found.")
        return None
    except pd.errors.ParserError:
        print(f"Error: File '{filepath}' could not be parsed. Check CSV formatting.")
        return None


def clean_data(df):
    """
    Strip leading/trailing whitespace from column names and specific string columns.
    This ensures consistent formatting when filtering or grouping.
    """
    df.columns = df.columns.str.strip()
    if 'sex' in df.columns:
        df['sex'] = df['sex'].astype(str).str.strip()
    if 'income' in df.columns:
        df['income'] = df['income'].astype(str).str.strip()
    return df


def analyze_gender_income_distribution(df):
    """
    Generate a crosstab of gender distribution across income levels.
    Returns the distribution in percentages.
    """
    # Validate that required columns exist
    assert 'sex' in df.columns, "'sex' column not found in the dataset."
    assert 'income' in df.columns, "'income' column not found in the dataset."

    # Crosstab: Percentage of each gender per income level
    return pd.crosstab(df['sex'], df['income'], normalize='columns') * 100


if __name__ == "__main__":
    file_path = "adult.csv"
    df = load_data(file_path)

    if df is not None:
        df = clean_data(df)
        try:
            gender_income_dist = analyze_gender_income_distribution(df)
            print("Gender Distribution by Income Level (%):\n")
            print(gender_income_dist.round(2))
        except AssertionError as e:
            print(f"Validation Error: {e}")


Error: File 'adult.csv' not found.
