The European Union (EU) is a politico-economic union of 28 member states that are located primarily in Europe. The eurozone (officially called the euro area is a monetary union of 19 of the 28 European Union (EU) member states which have adopted the euro (€) as their common currency.

In [None]:
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
%config InlineBackend.figure_format = 'png' #set 'png' here when working on notebook
warnings.filterwarnings('ignore') 

# Set some parameters to get good visuals - style to ggplot and size to 15,10

pd.set_option('display.width',170, 'display.max_rows',200, 'display.max_columns',300)

In [None]:
df = pd.read_csv("/Users/pvaish10/Desktop/Projects/Data/hnp_stats_csv/HNP_StatsData.csv")

In [None]:
df = df.drop(['Country Code','Indicator Code'], 1)

###### Selecting just Euro Area

In [None]:
dfeu = df[df['Country Name'] == "Euro area"]

In [None]:
import re
missing_values = []
nonumeric_values = []

print ("DATA SET INFORMATION")
print ("========================\n")

for column in dfeu:
    # Find all the unique feature values
    uniq = dfeu[column].unique()
    print ("'{}' has {} unique values" .format(column,uniq.size))
    if (uniq.size > 10):
        print("~~Listing up to 10 unique values~~")
    print (uniq[0:10])
    print ("\n-----------------------------------------------------------------------\n")
    
    # Find features with missing values
    if (True in pd.isnull(uniq)):
        s = "{} has {} missing" .format(column, pd.isnull(dfeu[column]).sum())
        missing_values.append(s)
    
    # Find features with non-numeric values
    for i in range (1, np.prod(uniq.shape)):
        if (re.match('nan', str(uniq[i]))):
            break
        if not (re.search('(^\d+\.?\d*$)|(^\d*\.?\d+$)', str(uniq[i]))):
            nonumeric_values.append(column)
            break
  
print ("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n")
print ("Features with missing values:\n{}\n\n" .format(missing_values))
print ("Features with non-numeric values:\n{}" .format(nonumeric_values))
print ("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n")



In [None]:
#Not null values for the column
# Selected 2015 because 2017 data is missing for some of the 
dfi2 = dfeu[dfeu['2015'].notnull()]

In [None]:
list(dfi2['Indicator Name'])

In [None]:
#Select the row with Total population , select only the years data, transpose the data and reset the index
dfeuarea = dfi2[dfi2['Indicator Name'].isin(['Adolescent fertility rate (births per 1,000 women ages 15-19)',
 'Age dependency ratio (% of working-age population)',
 'Age dependency ratio, old',
 'Age dependency ratio, young',
 'Antiretroviral therapy coverage (% of people living with HIV)',
 'Birth rate, crude (per 1,000 people)',
 'Cause of death, by communicable diseases and maternal, prenatal and nutrition conditions (% of total)',
 'Cause of death, by injury (% of total)',
 'Cause of death, by non-communicable diseases (% of total)',
 'Current health expenditure (% of GDP)',
 'Current health expenditure per capita (current US$)',
 'Current health expenditure per capita, PPP (current international $)',
 'Death rate, crude (per 1,000 people)',
 'Domestic general government health expenditure (% of current health expenditure)',
 'Domestic general government health expenditure (% of GDP)',
 'Domestic general government health expenditure (% of general government expenditure)',
 'Domestic general government health expenditure per capita (current US$)',
 'Domestic general government health expenditure per capita, PPP (current international $)',
 'Domestic private health expenditure (% of current health expenditure)',
 'Domestic private health expenditure per capita (current US$)',
 'Domestic private health expenditure per capita, PPP  (current international $)',
 'Female population 00-04',
 'Female population 05-09',
 'Female population 10-14',
 'Female population 15-19',
 'Female population 20-24',
 'Female population 25-29',
 'Female population 30-34',
 'Female population 35-39',
 'Female population 40-44',
 'Female population 45-49',
 'Female population 50-54',
 'Female population 55-59',
 'Female population 60-64',
 'Female population 65-69',
 'Female population 70-74',
 'Female population 75-79',
 'Female population 80+',
 'Fertility rate, total (births per woman)',
 'GNI per capita, Atlas method (current US$)',
 'Immunization, DPT (% of children ages 12-23 months)',
 'Immunization, HepB3 (% of one-year-old children)',
 'Immunization, Hib3 (% of children ages 12-23 months)',
 'Immunization, measles (% of children ages 12-23 months)',
 'Immunization, Pol3 (% of one-year-old children)',
 'Incidence of HIV (% of uninfected population ages 15-49)',
 'Labor force, female (% of total labor force)',
 'Labor force, total',
 'Life expectancy at birth, female (years)',
 'Life expectancy at birth, male (years)',
 'Life expectancy at birth, total (years)',
 'Lifetime risk of maternal death (%)',
 'Lifetime risk of maternal death (1 in: rate varies by country)',
 'Literacy rate, adult female (% of females ages 15 and above)',
 'Literacy rate, adult male (% of males ages 15 and above)',
 'Literacy rate, adult total (% of people ages 15 and above)',
 'Literacy rate, youth male (% of males ages 15-24)',
 'Literacy rate, youth total (% of people ages 15-24)',
 'Male population 00-04',
 'Male population 05-09',
 'Male population 10-14',
 'Male population 15-19',
 'Male population 20-24',
 'Male population 25-29',
 'Male population 30-34',
 'Male population 35-39',
 'Male population 40-44',
 'Male population 45-49',
 'Male population 50-54',
 'Male population 55-59',
 'Male population 60-64',
 'Male population 65-69',
 'Male population 70-74',
 'Male population 75-79',
 'Male population 80+',
 'Maternal mortality ratio (modeled estimate, per 100,000 live births)',
 'Mortality caused by road traffic injury (per 100,000 people)',
 'Mortality from CVD, cancer, diabetes or CRD between exact ages 30 and 70 (%)',
 'Mortality rate attributed to unintentional poisoning (per 100,000 population)',
 'Mortality rate attributed to unintentional poisoning, female (per 100,000 female population)',
 'Mortality rate attributed to unintentional poisoning, male (per 100,000 male population)',
 'Mortality rate, infant (per 1,000 live births)',
 'Mortality rate, infant, female (per 1,000 live births)',
 'Mortality rate, infant, male (per 1,000 live births)',
 'Mortality rate, neonatal (per 1,000 live births)',
 'Mortality rate, under-5 (per 1,000)',
 'Mortality rate, under-5, female (per 1,000)',
 'Mortality rate, under-5, male (per 1,000)',
 'Number of deaths ages 5-14 years',
 'Number of infant deaths',
 'Number of maternal deaths',
 'Number of neonatal deaths',
 'Number of people who are undernourished',
 'Number of under-five deaths',
 'Out-of-pocket expenditure (% of current health expenditure)',
 'Out-of-pocket expenditure per capita (current US$)',
 'Out-of-pocket expenditure per capita, PPP (current international $)',
 'People practicing open defecation (% of population)',
 'People practicing open defecation, rural (% of rural population)',
 'People practicing open defecation, urban (% of urban population)',
 'People using at least basic drinking water services (% of population)',
 'People using at least basic drinking water services, rural (% of rural population)',
 'People using at least basic drinking water services, urban (% of urban population)',
 'People using at least basic sanitation services (% of population)',
 'People using at least basic sanitation services, rural (% of rural population)',
 'People using at least basic sanitation services, urban  (% of urban population)',
 'People using safely managed drinking water services (% of population)',
 'People using safely managed sanitation services (% of population)',
 'People using safely managed sanitation services, rural (% of rural population)',
 'People using safely managed sanitation services, urban  (% of urban population)',
 'Population ages 00-04, female (% of female population)',
 'Population ages 00-04, male (% of male population)',
 'Population ages 00-14 (% of total)',
 'Population ages 00-14, female (% of total)',
 'Population ages 00-14, male (% of total)',
 'Population ages 00-14, total',
 'Population ages 0-14, female',
 'Population ages 0-14, male',
 'Population ages 05-09, female (% of female population)',
 'Population ages 05-09, male (% of male population)',
 'Population ages 10-14, female (% of female population)',
 'Population ages 15-19, female (% of female population)',
 'Population ages 15-19, male (% of male population)',
 'Population ages 15-64 (% of total)',
 'Population ages 15-64, female',
 'Population ages 15-64, female (% of total)',
 'Population ages 15-64, male',
 'Population ages 15-64, male (% of total)',
 'Population ages 15-64, total',
 'Population ages 20-24, female (% of female population)',
 'Population ages 20-24, male (% of male population)',
 'Population ages 25-29, female (% of female population)',
 'Population ages 25-29, male (% of male population)',
 'Population ages 30-34, female (% of female population)',
 'Population ages 30-34, male (% of male population)',
 'Population ages 35-39, female (% of female population)',
 'Population ages 35-39, male (% of male population)',
 'Population ages 40-44, female (% of female population)',
 'Population ages 40-44, male (% of male population)',
 'Population ages 45-49, female (% of female population)',
 'Population ages 45-49, male (% of male population)',
 'Population ages 50-54, female (% of female population)',
 'Population ages 50-54, male (% of male population)',
 'Population ages 55-59, female (% of female population)',
 'Population ages 55-59, male (% of male population)',
 'Population ages 60-64, female (% of female population)',
 'Population ages 60-64, male (% of male population)',
 'Population ages 65 and above (% of total)',
 'Population ages 65 and above, female',
 'Population ages 65 and above, female (% of total)',
 'Population ages 65 and above, male',
 'Population ages 65 and above, male (% of total)',
 'Population ages 65 and above, total',
 'Population ages 65-69, female (% of female population)',
 'Population ages 65-69, male (% of male population)',
 'Population ages 70-74, female (% of female population)',
 'Population ages 70-74, male (% of male population)',
 'Population ages 75-79, female (% of female population)',
 'Population ages 75-79, male (% of male population)',
 'Population ages 80 and older, female (% of female population)',
 'Population ages 80 and older, male (% of male population)',
 'Population ages10-14, male (% of male population)',
 'Population growth (annual %)',
 'Population, female',
 'Population, female (% of total)',
 'Population, male',
 'Population, male (% of total)',
 'Population, total',
 'Prevalence of anemia among children (% of children under 5)',
 'Prevalence of anemia among non-pregnant women (% of women ages 15-49)',
 'Prevalence of anemia among women of reproductive age (% of women ages 15-49)',
 'Prevalence of HIV, female (% ages 15-24)',
 'Prevalence of HIV, male (% ages 15-24)',
 'Prevalence of HIV, total (% of population ages 15-49)',
 'Prevalence of overweight (% of adults)',
 'Prevalence of overweight, female (% of female adults)',
 'Prevalence of overweight, male (% of male adults)',
 'Prevalence of undernourishment (% of population)',
 'Primary completion rate, female (% of relevant age group)',
 'Primary completion rate, male (% of relevant age group)',
 'Primary completion rate, total (% of relevant age group)',
 'Probability of dying at age 5-14 years (per 1,000 children age 5)',
 'Ratio of young literate females to males (% ages 15-24)',
 'Risk of catastrophic expenditure for surgical care (% of people at risk)',
 'Risk of impoverishing expenditure for surgical care (% of people at risk)',
 'Rural population',
 'Rural population (% of total population)',
 'Rural population growth (annual %)',
 'School enrollment, primary (% gross)',
 'School enrollment, primary (% net)',
 'School enrollment, primary, female (% gross)',
 'School enrollment, primary, female (% net)',
 'School enrollment, primary, male (% gross)',
 'School enrollment, primary, male (% net)',
 'School enrollment, secondary (% gross)',
 'School enrollment, secondary (% net)',
 'School enrollment, secondary, female (% gross)',
 'School enrollment, secondary, female (% net)',
 'School enrollment, secondary, male (% gross)',
 'School enrollment, secondary, male (% net)',
 'School enrollment, tertiary (% gross)',
 'School enrollment, tertiary, female (% gross)',
 'Sex ratio at birth (male births per female births)',
 'Smoking prevalence, females (% of adults)',
 'Smoking prevalence, males (% of adults)',
 'Smoking prevalence, total, ages 15+',
 'Suicide mortality rate (per 100,000 population)',
 'Survival to age 65, female (% of cohort)',
 'Survival to age 65, male (% of cohort)',
 'UHC service coverage index',
 'Unemployment, female (% of female labor force)',
 'Unemployment, male (% of male labor force)',
 'Unemployment, total (% of total labor force)',
 'Urban population',
 'Urban population (% of total)',
 'Urban population growth (annual %)',
 "Women's share of population ages 15+ living with HIV (%)"])].iloc[:,2:60].T.reset_index()
#rename the columns
dfeuarea.columns = ['Year','Adolescent fertility rate (births per 1,000 women ages 15-19)',
 'Age dependency ratio (% of working-age population)',
 'Age dependency ratio, old',
 'Age dependency ratio, young',
 'Antiretroviral therapy coverage (% of people living with HIV)',
 'Birth rate, crude (per 1,000 people)',
 'Cause of death, by communicable diseases and maternal, prenatal and nutrition conditions (% of total)',
 'Cause of death, by injury (% of total)',
 'Cause of death, by non-communicable diseases (% of total)',
 'Current health expenditure (% of GDP)',
 'Current health expenditure per capita (current US$)',
 'Current health expenditure per capita, PPP (current international $)',
 'Death rate, crude (per 1,000 people)',
 'Domestic general government health expenditure (% of current health expenditure)',
 'Domestic general government health expenditure (% of GDP)',
 'Domestic general government health expenditure (% of general government expenditure)',
 'Domestic general government health expenditure per capita (current US$)',
 'Domestic general government health expenditure per capita, PPP (current international $)',
 'Domestic private health expenditure (% of current health expenditure)',
 'Domestic private health expenditure per capita (current US$)',
 'Domestic private health expenditure per capita, PPP  (current international $)',
 'Female population 00-04',
 'Female population 05-09',
 'Female population 10-14',
 'Female population 15-19',
 'Female population 20-24',
 'Female population 25-29',
 'Female population 30-34',
 'Female population 35-39',
 'Female population 40-44',
 'Female population 45-49',
 'Female population 50-54',
 'Female population 55-59',
 'Female population 60-64',
 'Female population 65-69',
 'Female population 70-74',
 'Female population 75-79',
 'Female population 80+',
 'Fertility rate, total (births per woman)',
 'GNI per capita, Atlas method (current US$)',
 'Immunization, DPT (% of children ages 12-23 months)',
 'Immunization, HepB3 (% of one-year-old children)',
 'Immunization, Hib3 (% of children ages 12-23 months)',
 'Immunization, measles (% of children ages 12-23 months)',
 'Immunization, Pol3 (% of one-year-old children)',
 'Incidence of HIV (% of uninfected population ages 15-49)',
 'Labor force, female (% of total labor force)',
 'Labor force, total',
 'Life expectancy at birth, female (years)',
 'Life expectancy at birth, male (years)',
 'Life expectancy at birth, total (years)',
 'Lifetime risk of maternal death (%)',
 'Lifetime risk of maternal death (1 in: rate varies by country)',
 'Literacy rate, adult female (% of females ages 15 and above)',
 'Literacy rate, adult male (% of males ages 15 and above)',
 'Literacy rate, adult total (% of people ages 15 and above)',
 'Literacy rate, youth male (% of males ages 15-24)',
 'Literacy rate, youth total (% of people ages 15-24)',
 'Male population 00-04',
 'Male population 05-09',
 'Male population 10-14',
 'Male population 15-19',
 'Male population 20-24',
 'Male population 25-29',
 'Male population 30-34',
 'Male population 35-39',
 'Male population 40-44',
 'Male population 45-49',
 'Male population 50-54',
 'Male population 55-59',
 'Male population 60-64',
 'Male population 65-69',
 'Male population 70-74',
 'Male population 75-79',
 'Male population 80+',
 'Maternal mortality ratio (modeled estimate, per 100,000 live births)',
 'Mortality caused by road traffic injury (per 100,000 people)',
 'Mortality from CVD, cancer, diabetes or CRD between exact ages 30 and 70 (%)',
 'Mortality rate attributed to unintentional poisoning (per 100,000 population)',
 'Mortality rate attributed to unintentional poisoning, female (per 100,000 female population)',
 'Mortality rate attributed to unintentional poisoning, male (per 100,000 male population)',
 'Mortality rate, infant (per 1,000 live births)',
 'Mortality rate, infant, female (per 1,000 live births)',
 'Mortality rate, infant, male (per 1,000 live births)',
 'Mortality rate, neonatal (per 1,000 live births)',
 'Mortality rate, under-5 (per 1,000)',
 'Mortality rate, under-5, female (per 1,000)',
 'Mortality rate, under-5, male (per 1,000)',
 'Number of deaths ages 5-14 years',
 'Number of infant deaths',
 'Number of maternal deaths',
 'Number of neonatal deaths',
 'Number of people who are undernourished',
 'Number of under-five deaths',
 'Out-of-pocket expenditure (% of current health expenditure)',
 'Out-of-pocket expenditure per capita (current US$)',
 'Out-of-pocket expenditure per capita, PPP (current international $)',
 'People practicing open defecation (% of population)',
 'People practicing open defecation, rural (% of rural population)',
 'People practicing open defecation, urban (% of urban population)',
 'People using at least basic drinking water services (% of population)',
 'People using at least basic drinking water services, rural (% of rural population)',
 'People using at least basic drinking water services, urban (% of urban population)',
 'People using at least basic sanitation services (% of population)',
 'People using at least basic sanitation services, rural (% of rural population)',
 'People using at least basic sanitation services, urban  (% of urban population)',
 'People using safely managed drinking water services (% of population)',
 'People using safely managed sanitation services (% of population)',
 'People using safely managed sanitation services, rural (% of rural population)',
 'People using safely managed sanitation services, urban  (% of urban population)',
 'Population ages 00-04, female (% of female population)',
 'Population ages 00-04, male (% of male population)',
 'Population ages 00-14 (% of total)',
 'Population ages 00-14, female (% of total)',
 'Population ages 00-14, male (% of total)',
 'Population ages 00-14, total',
 'Population ages 0-14, female',
 'Population ages 0-14, male',
 'Population ages 05-09, female (% of female population)',
 'Population ages 05-09, male (% of male population)',
 'Population ages 10-14, female (% of female population)',
 'Population ages 15-19, female (% of female population)',
 'Population ages 15-19, male (% of male population)',
 'Population ages 15-64 (% of total)',
 'Population ages 15-64, female',
 'Population ages 15-64, female (% of total)',
 'Population ages 15-64, male',
 'Population ages 15-64, male (% of total)',
 'Population ages 15-64, total',
 'Population ages 20-24, female (% of female population)',
 'Population ages 20-24, male (% of male population)',
 'Population ages 25-29, female (% of female population)',
 'Population ages 25-29, male (% of male population)',
 'Population ages 30-34, female (% of female population)',
 'Population ages 30-34, male (% of male population)',
 'Population ages 35-39, female (% of female population)',
 'Population ages 35-39, male (% of male population)',
 'Population ages 40-44, female (% of female population)',
 'Population ages 40-44, male (% of male population)',
 'Population ages 45-49, female (% of female population)',
 'Population ages 45-49, male (% of male population)',
 'Population ages 50-54, female (% of female population)',
 'Population ages 50-54, male (% of male population)',
 'Population ages 55-59, female (% of female population)',
 'Population ages 55-59, male (% of male population)',
 'Population ages 60-64, female (% of female population)',
 'Population ages 60-64, male (% of male population)',
 'Population ages 65 and above (% of total)',
 'Population ages 65 and above, female',
 'Population ages 65 and above, female (% of total)',
 'Population ages 65 and above, male',
 'Population ages 65 and above, male (% of total)',
 'Population ages 65 and above, total',
 'Population ages 65-69, female (% of female population)',
 'Population ages 65-69, male (% of male population)',
 'Population ages 70-74, female (% of female population)',
 'Population ages 70-74, male (% of male population)',
 'Population ages 75-79, female (% of female population)',
 'Population ages 75-79, male (% of male population)',
 'Population ages 80 and older, female (% of female population)',
 'Population ages 80 and older, male (% of male population)',
 'Population ages10-14, male (% of male population)',
 'Population growth (annual %)',
 'Population, female',
 'Population, female (% of total)',
 'Population, male',
 'Population, male (% of total)',
 'Population, total',
 'Prevalence of anemia among children (% of children under 5)',
 'Prevalence of anemia among non-pregnant women (% of women ages 15-49)',
 'Prevalence of anemia among women of reproductive age (% of women ages 15-49)',
 'Prevalence of HIV, female (% ages 15-24)',
 'Prevalence of HIV, male (% ages 15-24)',
 'Prevalence of HIV, total (% of population ages 15-49)',
 'Prevalence of overweight (% of adults)',
 'Prevalence of overweight, female (% of female adults)',
 'Prevalence of overweight, male (% of male adults)',
 'Prevalence of undernourishment (% of population)',
 'Primary completion rate, female (% of relevant age group)',
 'Primary completion rate, male (% of relevant age group)',
 'Primary completion rate, total (% of relevant age group)',
 'Probability of dying at age 5-14 years (per 1,000 children age 5)',
 'Ratio of young literate females to males (% ages 15-24)',
 'Risk of catastrophic expenditure for surgical care (% of people at risk)',
 'Risk of impoverishing expenditure for surgical care (% of people at risk)',
 'Rural population',
 'Rural population (% of total population)',
 'Rural population growth (annual %)',
 'School enrollment, primary (% gross)',
 'School enrollment, primary (% net)',
 'School enrollment, primary, female (% gross)',
 'School enrollment, primary, female (% net)',
 'School enrollment, primary, male (% gross)',
 'School enrollment, primary, male (% net)',
 'School enrollment, secondary (% gross)',
 'School enrollment, secondary (% net)',
 'School enrollment, secondary, female (% gross)',
 'School enrollment, secondary, female (% net)',
 'School enrollment, secondary, male (% gross)',
 'School enrollment, secondary, male (% net)',
 'School enrollment, tertiary (% gross)',
 'School enrollment, tertiary, female (% gross)',
 'Sex ratio at birth (male births per female births)',
 'Smoking prevalence, females (% of adults)',
 'Smoking prevalence, males (% of adults)',
 'Smoking prevalence, total, ages 15+',
 'Suicide mortality rate (per 100,000 population)',
 'Survival to age 65, female (% of cohort)',
 'Survival to age 65, male (% of cohort)',
 'UHC service coverage index',
 'Unemployment, female (% of female labor force)',
 'Unemployment, male (% of male labor force)',
 'Unemployment, total (% of total labor force)',
 'Urban population',
 'Urban population (% of total)',
 'Urban population growth (annual %)',
 "Women's share of population ages 15+ living with HIV (%)"]
dfeuarea.set_index('Year', inplace=True)
dfeuarea.head()