### Bias & Fairness in Data: Distribution Check
**Description**: Load the Adult Income dataset and check for representation bias by analyzing the distribution of gender across different income levels.

In [1]:
# write your code from here  

import pandas as pd
from sklearn.datasets import fetch_openml

def load_adult_data():
    try:
        # Load dataset from OpenML
        data = fetch_openml(name='adult', version=2, as_frame=True)
        df = data.frame
        if df.empty:
            raise ValueError("Loaded dataset is empty.")
        return df
    except Exception as e:
        print(f"Error loading Adult dataset: {e}")
        return None

def clean_data(df):
    if df is None:
        print("No data to clean.")
        return None

    # In Adult dataset, missing values are marked as '?'
    df = df.replace('?', pd.NA)

    # Drop rows where 'sex' or 'income' is missing (critical columns)
    df = df.dropna(subset=['sex', 'class'])

    return df

def analyze_gender_income_distribution(df):
    if df is None or df.empty:
        print("No data for analysis.")
        return

    # Count representation of gender across income categories
    distribution = pd.crosstab(df['sex'], df['class'], normalize='columns') * 100

    print("\nGender distribution (%) across income levels:")
    print(distribution.round(2))

def test_adult_income_bias_analysis():
    print("\nRunning bias check test...")
    df = load_adult_data()
    df = clean_data(df)
    if df is None:
        print("Test aborted due to data loading/cleaning failure.")
        return

    analyze_gender_income_distribution(df)

    # Basic sanity checks
    assert 'sex' in df.columns, "'sex' column missing"
    assert 'class' in df.columns, "'class' column missing"
    assert not df['sex'].isnull().any(), "'sex' column contains nulls after cleaning"
    assert not df['class'].isnull().any(), "'class' column contains nulls after cleaning"
    print("Bias check test completed.\n")

if __name__ == "__main__":
    df = load_adult_data()
    df = clean_data(df)
    analyze_gender_income_distribution(df)
    test_adult_income_bias_analysis()


Gender distribution (%) across income levels:
class   <=50K   >50K
sex                 
Female  38.82  15.14
Male    61.18  84.86

Running bias check test...

Gender distribution (%) across income levels:
class   <=50K   >50K
sex                 
Female  38.82  15.14
Male    61.18  84.86
Bias check test completed.

