**Project Scope:**

In this challenge you must analyze demographic data using Pandas. You are given a dataset of demographic data that was extracted from the 1994 Census database.

In [2]:
import pandas as pd

In [3]:
adult_data = pd.read_csv('adult.data.csv')

In [16]:
adult_data.head()

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,salary
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K


1) How many people of each race are represented in this dataset? This should be a Pandas series with race names as the index labels.

In [14]:
num_race = pd.Series(adult_data['race'].value_counts(), index = adult_data['race'].unique())

In [15]:
num_race

White                 27816
Black                  3124
Asian-Pac-Islander     1039
Amer-Indian-Eskimo      311
Other                   271
Name: race, dtype: int64

2) What is the average age of men?

In [20]:
gender_means = adult_data.groupby(['sex']).age.mean()

In [22]:
gender_means

sex
Female    36.858230
Male      39.433547
Name: age, dtype: float64

3) What is the percentage of people who have a Bachelor's degree?

In [37]:
bachelors_percentage = len(adult_data.education[adult_data.education == 'Bachelors']) / len(adult_data) * 100

In [38]:
bachelors_percentage

16.44605509658794

4) What percentage of people with advanced education (Bachelors, Masters, or Doctorate) make more than 50K?

In [39]:
adult_data.education.value_counts()

HS-grad         10501
Some-college     7291
Bachelors        5355
Masters          1723
Assoc-voc        1382
11th             1175
Assoc-acdm       1067
10th              933
7th-8th           646
Prof-school       576
9th               514
12th              433
Doctorate         413
5th-6th           333
1st-4th           168
Preschool          51
Name: education, dtype: int64

In [53]:
adult_data.salary.unique()

array(['<=50K', '>50K'], dtype=object)

In [63]:
advanced_degree = adult_data[(adult_data.education == 'Doctorate') | (adult_data.education == 'Masters') | (adult_data.education == 'Bachelors')]

In [66]:
advanced_degree_over_50K = len(advanced_degree[advanced_degree.salary == '>50K']) / len(advanced_degree) * 100

In [67]:
advanced_degree_over_50K

46.535843011613935

5) What percentage of people without advanced education make more than 50K?

In [75]:
no_advanced_degree = adult_data[(adult_data.education != 'Doctorate') & (adult_data.education != 'Masters') & (adult_data.education != 'Bachelors')]

In [77]:
no_advanced_degree_over_50K = len(no_advanced_degree[no_advanced_degree.salary == '>50K']) / len(no_advanced_degree) * 100

In [78]:
no_advanced_degree_over_50K

17.3713601914639

6) What is the minimum number of hours a person works per week?

In [79]:
min_hours = adult_data['hours-per-week'].min()

In [80]:
min_hours

1

7) What percentage of the people who work the minimum number of hours per week have a salary of more than 50K?

In [81]:
min_hours_people = adult_data[adult_data['hours-per-week'] == min_hours]

In [82]:
min_hours_percent = len(min_hours_people[min_hours_people.salary == '>50K']) / len(min_hours_people) * 100

In [83]:
min_hours_percent

10.0

8) What country has the highest percentage of people that earn >50K and what is that percentage?

In [93]:
highest_percent = 0
best_country = ''
for country in adult_data['native-country'].unique():
    percent_current = len(adult_data[(adult_data['native-country'] == country) & (adult_data.salary == '>50K')]) / len(adult_data[adult_data['native-country'] == country]) * 100
    if percent_current > highest_percent:
        highest_percent = percent_current
        best_country = country
        
print(best_country, highest_percent)

Iran 41.86046511627907


9) Identify the most popular occupation for those who earn >50K in India.

In [97]:
india_data_over_50K = adult_data[(adult_data['native-country'] == 'India') & (adult_data.salary == '>50K')]
occupation_value_counts = india_data_over_50K.occupation.value_counts()

In [98]:
occupation_value_counts

Prof-specialty      25
Exec-managerial      8
Tech-support         2
Other-service        2
Sales                1
Transport-moving     1
Adm-clerical         1
Name: occupation, dtype: int64