<a href="https://colab.research.google.com/github/omda123-code/boilerplate-mean-variance-standard-deviation-calculator/blob/main/demographic_data_analyzer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import pandas as pd

def analyze_data(df):
    race_count = df['race'].value_counts()
    average_age_men = round(df[df['sex'] == 'Male']['age'].mean(), 1)
    percentage_bachelors = round((df['education'] == 'Bachelors').mean() * 100, 1)

    higher_education = df['education'].isin(['Bachelors', 'Masters', 'Doctorate'])
    higher_education_rich = df[higher_education & (df['salary'] == '>50K')]
    if len(df[higher_education]) > 0:
        percentage_higher_education_rich = round(len(higher_education_rich) / len(df[higher_education]) * 100, 1)
    else:
        percentage_higher_education_rich = 0

    lower_education = ~higher_education
    lower_education_rich = df[lower_education & (df['salary'] == '>50K')]
    if len(df[lower_education]) > 0:
        percentage_lower_education_rich = round(len(lower_education_rich) / len(df[lower_education]) * 100, 1)
    else:
        percentage_lower_education_rich = 0

    min_work_hours = df['hours-per-week'].min()
    num_min_workers = df[df['hours-per-week'] == min_work_hours]
    rich_min_workers = num_min_workers[num_min_workers['salary'] == '>50K']
    if len(num_min_workers) > 0:
        rich_percentage = round(len(rich_min_workers) / len(num_min_workers) * 100, 1)
    else:
        rich_percentage = 0

    country_counts = df['native-country'].value_counts()
    rich_country_counts = df[df['salary'] == '>50K']['native-country'].value_counts()
    rich_percentage_per_country = (rich_country_counts / country_counts * 100).fillna(0)
    if not rich_percentage_per_country.empty:
        highest_earning_country = rich_percentage_per_country.idxmax()
        highest_earning_country_percentage = round(rich_percentage_per_country.max(), 1)
    else:
        highest_earning_country = None
        highest_earning_country_percentage = 0

    india_rich = df[(df['native-country'] == 'India') & (df['salary'] == '>50K')]
    if not india_rich.empty:
        top_IN_occupation = india_rich['occupation'].mode()[0]
    else:
        top_IN_occupation = None

    results = {
        'race_count': race_count,
        'average_age_men': average_age_men,
        'percentage_bachelors': percentage_bachelors,
        'percentage_higher_education_rich': percentage_higher_education_rich,
        'percentage_lower_education_rich': percentage_lower_education_rich,
        'min_work_hours': min_work_hours,
        'rich_percentage': rich_percentage,
        'highest_earning_country': highest_earning_country,
        'highest_earning_country_percentage': highest_earning_country_percentage,
        'top_IN_occupation': top_IN_occupation
    }
    return results

if __name__ == '__main__':
    url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data'
    df = pd.read_csv(url, header=None, names=[
        'age', 'workclass', 'fnlwgt', 'education', 'education-num', 'marital-status',
        'occupation', 'relationship', 'race', 'sex', 'capital-gain', 'capital-loss',
        'hours-per-week', 'native-country', 'salary'
    ])

    # تنظيف القيم النصية من الفراغات
    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

    results = analyze_data(df)
    for key, value in results.items():
        print(f"{key}:")
        print(value)
        print('---------------------')


  df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)


race_count:
race
White                 27816
Black                  3124
Asian-Pac-Islander     1039
Amer-Indian-Eskimo      311
Other                   271
Name: count, dtype: int64
---------------------
average_age_men:
39.4
---------------------
percentage_bachelors:
16.4
---------------------
percentage_higher_education_rich:
46.5
---------------------
percentage_lower_education_rich:
17.4
---------------------
min_work_hours:
1
---------------------
rich_percentage:
10.0
---------------------
highest_earning_country:
Iran
---------------------
highest_earning_country_percentage:
41.9
---------------------
top_IN_occupation:
Prof-specialty
---------------------
