# WEEK 03
# Encounter 03 - Descriptive Statistics
# Project Challenge - Pandas Descriptive Statistics Challenges

## Task Description

Solve the following pandas challenges:

 1. read the life expectancy cleaned dataset into your notebook (the dataset you created in Data Cleansing before). If you want to use country as the index you may use the `index_col` parameter while reading it from the file:
>`life_df = pd.read_csv('life_expectancy_cleaned.csv', index_col=0)`
 2. calculate the mean life expectancy
 3. calculate the mean life expectancy for the year 2000
 4. calculate the median for 1995
 5. calculate the standard deviation
 6. find the highest life expectancy
 7. find the country and year for the highest life expectancy
 8. find the lowest life expectancy
 9. find the country and year was the lowest life expectancy
 10. find the 90% quantile of the life expectancy
 11. calculate min, max, mean and possibly other descriptors with a single line

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# 1. read the life expectancy cleaned dataset into your notebook 
# (the dataset you created in Data Cleansing before).
# If you want to use country as the index you may use the `index_col` parameter while reading it from the file:

life_df = pd.read_csv('../data/life_expectancy_after_1950.csv', index_col=0)
life_df

Unnamed: 0_level_0,year,life expectancy
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,1950,26.85
Albania,1950,54.48
Algeria,1950,42.77
Angola,1950,30.70
Antigua and Barbuda,1950,57.97
...,...,...
Virgin Islands (U.S.),2016,80.82
Yemen,2016,64.92
Zambia,2016,57.10
Zimbabwe,2016,61.69


In [7]:
# 2. calculate the mean life expectancy

life_expectancy_mean = life_df['life expectancy'].mean()
life_expectancy_mean = round(life_expectancy_mean, 2)
life_expectancy_mean

62.99

In [8]:
# 3. calculate the mean life expectancy for the year 2000

#boolean mask to sort out 2000 year data
year_2000 = life_df['year'] == 2000

life_expectancy_2000_mean = life_df[year_2000]['life expectancy'].mean()
life_expectancy_2000_mean = round(life_expectancy_2000_mean, 2)
life_expectancy_2000_mean

68.08

In [9]:
# 4. calculate the median for 1995

mask_1995 = life_df['year'] == 1995

life_expectancy_1995_median = life_df[mask_1995]['life expectancy'].median()
life_expectancy_1995_median = round(life_expectancy_1995_median, 2)
life_expectancy_1995_median


70.0

In [11]:
# 5. calculate the standard deviation

sd = life_df['life expectancy'].std()
sd = round(sd, 2)
sd

11.9

In [12]:
# 6. find the highest life expectancy

max_life_exp = life_df['life expectancy'].max()
max_life_exp

84.8

In [13]:
# 7. find the country and year for the highest life expectancy

mask_max_life_exp = life_df['life expectancy'] == max_life_exp
life_df[mask_max_life_exp]

Unnamed: 0_level_0,year,life expectancy
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Andorra,2013,84.8
Andorra,2014,84.8
Andorra,2015,84.8
Andorra,2016,84.8


In [17]:
# Extra: Country name
country_max_life_exp = life_df[mask_max_life_exp].index.unique()[0]
country_max_life_exp

'Andorra'

In [22]:
# Extra: get the list of years:
years_max_life_exp = life_df[mask_max_life_exp]['year'].unique().tolist()
years_max_life_exp

[2013, 2014, 2015, 2016]

In [23]:
# 8. find the lowest life expectancy

min_life_exp = life_df['life expectancy'].min()
min_life_exp

13.2

In [24]:
# 9. find the country and year with the lowest life expectancy

mask_min_life_exp = life_df['life expectancy'] == min_life_exp
life_df[mask_min_life_exp]

Unnamed: 0_level_0,year,life expectancy
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Rwanda,1994,13.2


In [25]:
# Extra: country with the lowest life expectancy
country_min_life_exp = life_df[mask_min_life_exp].index.unique()[0]
country_min_life_exp

'Rwanda'

In [26]:
# Extra: years with the lowest life expectancy
years_min_life_exp = life_df[mask_min_life_exp]['year'].unique().tolist()
years_min_life_exp

[1994]

In [34]:
# 10. find the 90% quantile of the life expectancy

df_quantiles = life_df['life expectancy'].quantile([0.9])
df_quantiles.loc[0.9]

76.4

In [36]:
# 11. calculate min, max, mean and possibly other descriptors with a single line

life_df.describe()

Unnamed: 0,year,life expectancy
count,13707.0,13707.0
mean,1983.25155,62.988833
std,19.364466,11.895572
min,1950.0,13.2
25%,1966.0,54.815
50%,1983.0,65.57
75%,2000.0,72.23
max,2016.0,84.8
