In [None]:
import numpy as np 
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
%config InlineBackend.figure_format = 'png' #set 'png' here when working on notebook
warnings.filterwarnings('ignore') 

# Set some parameters to get good visuals - style to ggplot and size to 15,10

pd.set_option('display.width',170, 'display.max_rows',200, 'display.max_columns',300)

In [None]:
df = pd.read_csv("/Users/pvaish10/Desktop/Projects/Data/hnp_stats_csv/HNP_StatsData.csv")

In [None]:
df.columns

In [None]:
print ("\n\n---------------------")
print ("DATA SET INFORMATION")
print ("---------------------")
print ("Shape of training set:", df.shape, "\n")
print ("Column Headers:", list(df.columns.values), "\n")
print (df.dtypes)

In [None]:
import re
missing_values = []
nonumeric_values = []

print ("DATA SET INFORMATION")
print ("========================\n")

for column in df:
    # Find all the unique feature values
    uniq = df[column].unique()
    print ("'{}' has {} unique values" .format(column,uniq.size))
    if (uniq.size > 10):
        print("~~Listing up to 10 unique values~~")
    print (uniq[0:10])
    print ("\n-----------------------------------------------------------------------\n")
    
    # Find features with missing values
    if (True in pd.isnull(uniq)):
        s = "{} has {} missing" .format(column, pd.isnull(df[column]).sum())
        missing_values.append(s)
    
    # Find features with non-numeric values
    for i in range (1, np.prod(uniq.shape)):
        if (re.match('nan', str(uniq[i]))):
            break
        if not (re.search('(^\d+\.?\d*$)|(^\d*\.?\d+$)', str(uniq[i]))):
            nonumeric_values.append(column)
            break
  
print ("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n")
print ("Features with missing values:\n{}\n\n" .format(missing_values))
print ("Features with non-numeric values:\n{}" .format(nonumeric_values))
print ("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n")


In [None]:
df = df.drop(['Country Code','Indicator Code'], 1)

###### Selecting just Ireland

In [None]:
dfireland = df[df['Country Name'] == "Ireland"]

In [None]:
import re
missing_values = []
nonumeric_values = []

print ("DATA SET INFORMATION")
print ("========================\n")

for column in dfireland:
    # Find all the unique feature values
    uniq = dfireland[column].unique()
    print ("'{}' has {} unique values" .format(column,uniq.size))
    if (uniq.size > 10):
        print("~~Listing up to 10 unique values~~")
    print (uniq[0:10])
    print ("\n-----------------------------------------------------------------------\n")
    
    # Find features with missing values
    if (True in pd.isnull(uniq)):
        s = "{} has {} missing" .format(column, pd.isnull(dfireland[column]).sum())
        missing_values.append(s)
    
    # Find features with non-numeric values
    for i in range (1, np.prod(uniq.shape)):
        if (re.match('nan', str(uniq[i]))):
            break
        if not (re.search('(^\d+\.?\d*$)|(^\d*\.?\d+$)', str(uniq[i]))):
            nonumeric_values.append(column)
            break
  
print ("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n")
print ("Features with missing values:\n{}\n\n" .format(missing_values))
print ("Features with non-numeric values:\n{}" .format(nonumeric_values))
print ("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n")



In [None]:
#Not null values for the column
# Selected 2015 because 2017 data is missing for some of the 
dfi2 = dfireland[dfireland['2015'].notnull()]

In [None]:
list(dfi2['Indicator Name'])

In [None]:
#Select the row with Total population , select only the years data, transpose the data and reset the index
dfip = dfi2[dfi2['Indicator Name'].isin(["Population, total","Labor force, total","Population growth (annual %)"])].iloc[:,2:60].T.reset_index()
#rename the columns
dfip.columns = ['Year','Total Labor Force','Population growth (annual %)', 'Total Population']
dfip

In [None]:
#Select the row with Total population , select only the years data, transpose the data and reset the index
dfi = dfi2[dfi2['Indicator Name'].isin(['Adolescent fertility rate (births per 1,000 women ages 15-19)',
 'Adults (ages 15+) and children (0-14 years) living with HIV',
 'Adults (ages 15+) living with HIV',
 'Age dependency ratio (% of working-age population)',
 'Age dependency ratio, old',
 'Age dependency ratio, young',
 'Age population, age 0, female, interpolated',
 'Age population, age 0, male, interpolated',
 'Age population, age 01, female, interpolated',
 'Age population, age 01, male, interpolated',
 'Age population, age 02, female, interpolated',
 'Age population, age 02, male, interpolated',
 'Age population, age 03, female, interpolated',
 'Age population, age 03, male, interpolated',
 'Age population, age 04, female, interpolated',
 'Age population, age 04, male, interpolated',
 'Age population, age 05, female, interpolated',
 'Age population, age 05, male, interpolated',
 'Age population, age 06, female, interpolated',
 'Age population, age 06, male, interpolated',
 'Age population, age 07, female, interpolated',
 'Age population, age 07, male, interpolated',
 'Age population, age 08, female, interpolated',
 'Age population, age 08, male, interpolated',
 'Age population, age 09, female, interpolated',
 'Age population, age 09, male, interpolated',
 'Age population, age 10, female, interpolated',
 'Age population, age 10, male, interpolated',
 'Age population, age 11, female, interpolated',
 'Age population, age 11, male, interpolated',
 'Age population, age 12, female, interpolated',
 'Age population, age 12, male, interpolated',
 'Age population, age 13, female, interpolated',
 'Age population, age 13, male, interpolated',
 'Age population, age 14, female, interpolated',
 'Age population, age 14, male, interpolated',
 'Age population, age 15, female, interpolated',
 'Age population, age 15, male, interpolated',
 'Age population, age 16, female, interpolated',
 'Age population, age 16, male, interpolated',
 'Age population, age 17, female, interpolated',
 'Age population, age 17, male, interpolated',
 'Age population, age 18, female, interpolated',
 'Age population, age 18, male, interpolated',
 'Age population, age 19, female, interpolated',
 'Age population, age 19, male, interpolated',
 'Age population, age 20, female, interpolated',
 'Age population, age 20, male, interpolated',
 'Age population, age 21, female, interpolated',
 'Age population, age 21, male, interpolated',
 'Age population, age 22, female, interpolated',
 'Age population, age 22, male, interpolated',
 'Age population, age 23, female, interpolated',
 'Age population, age 23, male, interpolated',
 'Age population, age 24, female, interpolated',
 'Age population, age 24, male, interpolated',
 'Age population, age 25, female, interpolated',
 'Age population, age 25, male, interpolated',
 'AIDS estimated deaths (UNAIDS estimates)',
 'Antiretroviral therapy coverage (% of people living with HIV)',
 'Birth rate, crude (per 1,000 people)',
 'Births attended by skilled health staff (% of total)',
 'Capital health expenditure (% of GDP)',
 'Cause of death, by communicable diseases and maternal, prenatal and nutrition conditions (% of total)',
 'Cause of death, by injury (% of total)',
 'Cause of death, by non-communicable diseases (% of total)',
 'Current health expenditure (% of GDP)',
 'Current health expenditure per capita (current US$)',
 'Current health expenditure per capita, PPP (current international $)',
 'Death rate, crude (per 1,000 people)',
 'Domestic general government health expenditure (% of current health expenditure)',
 'Domestic general government health expenditure (% of GDP)',
 'Domestic general government health expenditure (% of general government expenditure)',
 'Domestic general government health expenditure per capita (current US$)',
 'Domestic general government health expenditure per capita, PPP (current international $)',
 'Domestic private health expenditure (% of current health expenditure)',
 'Domestic private health expenditure per capita (current US$)',
 'Domestic private health expenditure per capita, PPP  (current international $)',
 'External health expenditure (% of current health expenditure)',
 'External health expenditure per capita (current US$)',
 'External health expenditure per capita, PPP (current international $)',
 'Female population 00-04',
 'Female population 05-09',
 'Female population 10-14',
 'Female population 15-19',
 'Female population 20-24',
 'Female population 25-29',
 'Female population 30-34',
 'Female population 35-39',
 'Female population 40-44',
 'Female population 45-49',
 'Female population 50-54',
 'Female population 55-59',
 'Female population 60-64',
 'Female population 65-69',
 'Female population 70-74',
 'Female population 75-79',
 'Female population 80+',
 'Fertility rate, total (births per woman)',
 'GNI per capita, Atlas method (current US$)',
 'Immunization, BCG (% of one-year-old children)',
 'Immunization, DPT (% of children ages 12-23 months)',
 'Immunization, HepB3 (% of one-year-old children)',
 'Immunization, Hib3 (% of children ages 12-23 months)',
 'Immunization, measles (% of children ages 12-23 months)',
 'Immunization, Pol3 (% of one-year-old children)',
 'Incidence of tuberculosis (per 100,000 people)',
 'Labor force, female (% of total labor force)',
 'Labor force, total',
 'Life expectancy at birth, female (years)',
 'Life expectancy at birth, male (years)',
 'Life expectancy at birth, total (years)',
 'Lifetime risk of maternal death (%)',
 'Lifetime risk of maternal death (1 in: rate varies by country)',
 'Male population 00-04',
 'Male population 05-09',
 'Male population 10-14',
 'Male population 15-19',
 'Male population 20-24',
 'Male population 25-29',
 'Male population 30-34',
 'Male population 35-39',
 'Male population 40-44',
 'Male population 45-49',
 'Male population 50-54',
 'Male population 55-59',
 'Male population 60-64',
 'Male population 65-69',
 'Male population 70-74',
 'Male population 75-79',
 'Male population 80+',
 'Maternal leave benefits (% of wages paid in covered period)',
 'Maternal mortality ratio (modeled estimate, per 100,000 live births)',
 'Mortality caused by road traffic injury (per 100,000 people)',
 'Mortality from CVD, cancer, diabetes or CRD between exact ages 30 and 70 (%)',
 'Mortality rate attributed to unintentional poisoning (per 100,000 population)',
 'Mortality rate attributed to unintentional poisoning, female (per 100,000 female population)',
 'Mortality rate attributed to unintentional poisoning, male (per 100,000 male population)',
 'Mortality rate, infant (per 1,000 live births)',
 'Mortality rate, infant, female (per 1,000 live births)',
 'Mortality rate, infant, male (per 1,000 live births)',
 'Mortality rate, neonatal (per 1,000 live births)',
 'Mortality rate, under-5 (per 1,000)',
 'Mortality rate, under-5, female (per 1,000)',
 'Mortality rate, under-5, male (per 1,000)',
 'Number of deaths ages 5-14 years',
 'Number of infant deaths',
 'Number of maternal deaths',
 'Number of neonatal deaths',
 'Number of under-five deaths',
 'Number of weeks of maternity leave',
 'Out-of-pocket expenditure (% of current health expenditure)',
 'Out-of-pocket expenditure per capita (current US$)',
 'Out-of-pocket expenditure per capita, PPP (current international $)',
 'People practicing open defecation (% of population)',
 'People practicing open defecation, rural (% of rural population)',
 'People practicing open defecation, urban (% of urban population)',
 'People using at least basic drinking water services (% of population)',
 'People using at least basic drinking water services, rural (% of rural population)',
 'People using at least basic drinking water services, urban (% of urban population)',
 'People using at least basic sanitation services (% of population)',
 'People using at least basic sanitation services, rural (% of rural population)',
 'People using at least basic sanitation services, urban  (% of urban population)',
 'People using safely managed drinking water services (% of population)',
 'People using safely managed sanitation services (% of population)',
 'People using safely managed sanitation services, urban  (% of urban population)',
 'Physicians (per 1,000 people)',
 'Population ages 00-04, female (% of female population)',
 'Population ages 00-04, male (% of male population)',
 'Population ages 00-14 (% of total)',
 'Population ages 00-14, female (% of total)',
 'Population ages 00-14, male (% of total)',
 'Population ages 00-14, total',
 'Population ages 0-14, female',
 'Population ages 0-14, male',
 'Population ages 05-09, female (% of female population)',
 'Population ages 05-09, male (% of male population)',
 'Population ages 10-14, female (% of female population)',
 'Population ages 15-19, female (% of female population)',
 'Population ages 15-19, male (% of male population)',
 'Population ages 15-64 (% of total)',
 'Population ages 15-64, female',
 'Population ages 15-64, female (% of total)',
 'Population ages 15-64, male',
 'Population ages 15-64, male (% of total)',
 'Population ages 15-64, total',
 'Population ages 20-24, female (% of female population)',
 'Population ages 20-24, male (% of male population)',
 'Population ages 25-29, female (% of female population)',
 'Population ages 25-29, male (% of male population)',
 'Population ages 30-34, female (% of female population)',
 'Population ages 30-34, male (% of male population)',
 'Population ages 35-39, female (% of female population)',
 'Population ages 35-39, male (% of male population)',
 'Population ages 40-44, female (% of female population)',
 'Population ages 40-44, male (% of male population)',
 'Population ages 45-49, female (% of female population)',
 'Population ages 45-49, male (% of male population)',
 'Population ages 50-54, female (% of female population)',
 'Population ages 50-54, male (% of male population)',
 'Population ages 55-59, female (% of female population)',
 'Population ages 55-59, male (% of male population)',
 'Population ages 60-64, female (% of female population)',
 'Population ages 60-64, male (% of male population)',
 'Population ages 65 and above (% of total)',
 'Population ages 65 and above, female',
 'Population ages 65 and above, female (% of total)',
 'Population ages 65 and above, male',
 'Population ages 65 and above, male (% of total)',
 'Population ages 65 and above, total',
 'Population ages 65-69, female (% of female population)',
 'Population ages 65-69, male (% of male population)',
 'Population ages 70-74, female (% of female population)',
 'Population ages 70-74, male (% of male population)',
 'Population ages 75-79, female (% of female population)',
 'Population ages 75-79, male (% of male population)',
 'Population ages 80 and older, female (% of female population)',
 'Population ages 80 and older, male (% of male population)',
 'Population ages10-14, male (% of male population)',
 'Population growth (annual %)',
 'Population, female',
 'Population, female (% of total)',
 'Population, male',
 'Population, male (% of total)',
 'Population, total',
 'Prevalence of anemia among children (% of children under 5)',
 'Prevalence of anemia among non-pregnant women (% of women ages 15-49)',
 'Prevalence of anemia among pregnant women (%)',
 'Prevalence of anemia among women of reproductive age (% of women ages 15-49)',
 'Prevalence of HIV, female (% ages 15-24)',
 'Prevalence of HIV, male (% ages 15-24)',
 'Prevalence of HIV, total (% of population ages 15-49)',
 'Prevalence of overweight (% of adults)',
 'Prevalence of overweight, female (% of female adults)',
 'Prevalence of overweight, male (% of male adults)',
 'Prevalence of undernourishment (% of population)',
 'Probability of dying at age 5-14 years (per 1,000 children age 5)',
 'Risk of catastrophic expenditure for surgical care (% of people at risk)',
 'Risk of impoverishing expenditure for surgical care (% of people at risk)',
 'Rural population',
 'Rural population (% of total population)',
 'Rural population growth (annual %)',
 'School enrollment, primary (% gross)',
 'School enrollment, primary (% net)',
 'School enrollment, primary, female (% gross)',
 'School enrollment, primary, male (% gross)',
 'School enrollment, secondary (% gross)',
 'School enrollment, secondary, female (% gross)',
 'School enrollment, secondary, male (% gross)',
 'School enrollment, tertiary (% gross)',
 'School enrollment, tertiary, female (% gross)',
 'Sex ratio at birth (male births per female births)',
 'Smoking prevalence, females (% of adults)',
 'Smoking prevalence, males (% of adults)',
 'Smoking prevalence, total, ages 15+',
 'Specialist surgical workforce (per 100,000 population)',
 'Suicide mortality rate (per 100,000 population)',
 'Survival to age 65, female (% of cohort)',
 'Survival to age 65, male (% of cohort)',
 'Tuberculosis case detection rate (all forms)',
 'Tuberculosis death rate (per 100,000 people)',
 'Tuberculosis treatment success rate (% of new cases)',
 'UHC service coverage index',
 'Unemployment, female (% of female labor force)',
 'Unemployment, male (% of male labor force)',
 'Unemployment, total (% of total labor force)',
 'Urban population',
 'Urban population (% of total)',
 'Urban population growth (annual %)',
 "Women's share of population ages 15+ living with HIV (%)"])].iloc[:,2:60].T.reset_index()
#rename the columns
dfi.columns = ['Year','Adolescent fertility rate (births per 1,000 women ages 15-19)',
 'Adults (ages 15+) and children (0-14 years) living with HIV',
 'Adults (ages 15+) living with HIV',
 'Age dependency ratio (% of working-age population)',
 'Age dependency ratio, old',
 'Age dependency ratio, young',
 'Age population, age 0, female, interpolated',
 'Age population, age 0, male, interpolated',
 'Age population, age 01, female, interpolated',
 'Age population, age 01, male, interpolated',
 'Age population, age 02, female, interpolated',
 'Age population, age 02, male, interpolated',
 'Age population, age 03, female, interpolated',
 'Age population, age 03, male, interpolated',
 'Age population, age 04, female, interpolated',
 'Age population, age 04, male, interpolated',
 'Age population, age 05, female, interpolated',
 'Age population, age 05, male, interpolated',
 'Age population, age 06, female, interpolated',
 'Age population, age 06, male, interpolated',
 'Age population, age 07, female, interpolated',
 'Age population, age 07, male, interpolated',
 'Age population, age 08, female, interpolated',
 'Age population, age 08, male, interpolated',
 'Age population, age 09, female, interpolated',
 'Age population, age 09, male, interpolated',
 'Age population, age 10, female, interpolated',
 'Age population, age 10, male, interpolated',
 'Age population, age 11, female, interpolated',
 'Age population, age 11, male, interpolated',
 'Age population, age 12, female, interpolated',
 'Age population, age 12, male, interpolated',
 'Age population, age 13, female, interpolated',
 'Age population, age 13, male, interpolated',
 'Age population, age 14, female, interpolated',
 'Age population, age 14, male, interpolated',
 'Age population, age 15, female, interpolated',
 'Age population, age 15, male, interpolated',
 'Age population, age 16, female, interpolated',
 'Age population, age 16, male, interpolated',
 'Age population, age 17, female, interpolated',
 'Age population, age 17, male, interpolated',
 'Age population, age 18, female, interpolated',
 'Age population, age 18, male, interpolated',
 'Age population, age 19, female, interpolated',
 'Age population, age 19, male, interpolated',
 'Age population, age 20, female, interpolated',
 'Age population, age 20, male, interpolated',
 'Age population, age 21, female, interpolated',
 'Age population, age 21, male, interpolated',
 'Age population, age 22, female, interpolated',
 'Age population, age 22, male, interpolated',
 'Age population, age 23, female, interpolated',
 'Age population, age 23, male, interpolated',
 'Age population, age 24, female, interpolated',
 'Age population, age 24, male, interpolated',
 'Age population, age 25, female, interpolated',
 'Age population, age 25, male, interpolated',
 'AIDS estimated deaths (UNAIDS estimates)',
 'Antiretroviral therapy coverage (% of people living with HIV)',
 'Birth rate, crude (per 1,000 people)',
 'Births attended by skilled health staff (% of total)',
 'Capital health expenditure (% of GDP)',
 'Cause of death, by communicable diseases and maternal, prenatal and nutrition conditions (% of total)',
 'Cause of death, by injury (% of total)',
 'Cause of death, by non-communicable diseases (% of total)',
 'Current health expenditure (% of GDP)',
 'Current health expenditure per capita (current US$)',
 'Current health expenditure per capita, PPP (current international $)',
 'Death rate, crude (per 1,000 people)',
 'Domestic general government health expenditure (% of current health expenditure)',
 'Domestic general government health expenditure (% of GDP)',
 'Domestic general government health expenditure (% of general government expenditure)',
 'Domestic general government health expenditure per capita (current US$)',
 'Domestic general government health expenditure per capita, PPP (current international $)',
 'Domestic private health expenditure (% of current health expenditure)',
 'Domestic private health expenditure per capita (current US$)',
 'Domestic private health expenditure per capita, PPP  (current international $)',
 'External health expenditure (% of current health expenditure)',
 'External health expenditure per capita (current US$)',
 'External health expenditure per capita, PPP (current international $)',
 'Female population 00-04',
 'Female population 05-09',
 'Female population 10-14',
 'Female population 15-19',
 'Female population 20-24',
 'Female population 25-29',
 'Female population 30-34',
 'Female population 35-39',
 'Female population 40-44',
 'Female population 45-49',
 'Female population 50-54',
 'Female population 55-59',
 'Female population 60-64',
 'Female population 65-69',
 'Female population 70-74',
 'Female population 75-79',
 'Female population 80+',
 'Fertility rate, total (births per woman)',
 'GNI per capita, Atlas method (current US$)',
 'Immunization, BCG (% of one-year-old children)',
 'Immunization, DPT (% of children ages 12-23 months)',
 'Immunization, HepB3 (% of one-year-old children)',
 'Immunization, Hib3 (% of children ages 12-23 months)',
 'Immunization, measles (% of children ages 12-23 months)',
 'Immunization, Pol3 (% of one-year-old children)',
 'Incidence of tuberculosis (per 100,000 people)',
 'Labor force, female (% of total labor force)',
 'Labor force, total',
 'Life expectancy at birth, female (years)',
 'Life expectancy at birth, male (years)',
 'Life expectancy at birth, total (years)',
 'Lifetime risk of maternal death (%)',
 'Lifetime risk of maternal death (1 in: rate varies by country)',
 'Male population 00-04',
 'Male population 05-09',
 'Male population 10-14',
 'Male population 15-19',
 'Male population 20-24',
 'Male population 25-29',
 'Male population 30-34',
 'Male population 35-39',
 'Male population 40-44',
 'Male population 45-49',
 'Male population 50-54',
 'Male population 55-59',
 'Male population 60-64',
 'Male population 65-69',
 'Male population 70-74',
 'Male population 75-79',
 'Male population 80+',
 'Maternal leave benefits (% of wages paid in covered period)',
 'Maternal mortality ratio (modeled estimate, per 100,000 live births)',
 'Mortality caused by road traffic injury (per 100,000 people)',
 'Mortality from CVD, cancer, diabetes or CRD between exact ages 30 and 70 (%)',
 'Mortality rate attributed to unintentional poisoning (per 100,000 population)',
 'Mortality rate attributed to unintentional poisoning, female (per 100,000 female population)',
 'Mortality rate attributed to unintentional poisoning, male (per 100,000 male population)',
 'Mortality rate, infant (per 1,000 live births)',
 'Mortality rate, infant, female (per 1,000 live births)',
 'Mortality rate, infant, male (per 1,000 live births)',
 'Mortality rate, neonatal (per 1,000 live births)',
 'Mortality rate, under-5 (per 1,000)',
 'Mortality rate, under-5, female (per 1,000)',
 'Mortality rate, under-5, male (per 1,000)',
 'Number of deaths ages 5-14 years',
 'Number of infant deaths',
 'Number of maternal deaths',
 'Number of neonatal deaths',
 'Number of under-five deaths',
 'Number of weeks of maternity leave',
 'Out-of-pocket expenditure (% of current health expenditure)',
 'Out-of-pocket expenditure per capita (current US$)',
 'Out-of-pocket expenditure per capita, PPP (current international $)',
 'People practicing open defecation (% of population)',
 'People practicing open defecation, rural (% of rural population)',
 'People practicing open defecation, urban (% of urban population)',
 'People using at least basic drinking water services (% of population)',
 'People using at least basic drinking water services, rural (% of rural population)',
 'People using at least basic drinking water services, urban (% of urban population)',
 'People using at least basic sanitation services (% of population)',
 'People using at least basic sanitation services, rural (% of rural population)',
 'People using at least basic sanitation services, urban  (% of urban population)',
 'People using safely managed drinking water services (% of population)',
 'People using safely managed sanitation services (% of population)',
 'People using safely managed sanitation services, urban  (% of urban population)',
 'Physicians (per 1,000 people)',
 'Population ages 00-04, female (% of female population)',
 'Population ages 00-04, male (% of male population)',
 'Population ages 00-14 (% of total)',
 'Population ages 00-14, female (% of total)',
 'Population ages 00-14, male (% of total)',
 'Population ages 00-14, total',
 'Population ages 0-14, female',
 'Population ages 0-14, male',
 'Population ages 05-09, female (% of female population)',
 'Population ages 05-09, male (% of male population)',
 'Population ages 10-14, female (% of female population)',
 'Population ages 15-19, female (% of female population)',
 'Population ages 15-19, male (% of male population)',
 'Population ages 15-64 (% of total)',
 'Population ages 15-64, female',
 'Population ages 15-64, female (% of total)',
 'Population ages 15-64, male',
 'Population ages 15-64, male (% of total)',
 'Population ages 15-64, total',
 'Population ages 20-24, female (% of female population)',
 'Population ages 20-24, male (% of male population)',
 'Population ages 25-29, female (% of female population)',
 'Population ages 25-29, male (% of male population)',
 'Population ages 30-34, female (% of female population)',
 'Population ages 30-34, male (% of male population)',
 'Population ages 35-39, female (% of female population)',
 'Population ages 35-39, male (% of male population)',
 'Population ages 40-44, female (% of female population)',
 'Population ages 40-44, male (% of male population)',
 'Population ages 45-49, female (% of female population)',
 'Population ages 45-49, male (% of male population)',
 'Population ages 50-54, female (% of female population)',
 'Population ages 50-54, male (% of male population)',
 'Population ages 55-59, female (% of female population)',
 'Population ages 55-59, male (% of male population)',
 'Population ages 60-64, female (% of female population)',
 'Population ages 60-64, male (% of male population)',
 'Population ages 65 and above (% of total)',
 'Population ages 65 and above, female',
 'Population ages 65 and above, female (% of total)',
 'Population ages 65 and above, male',
 'Population ages 65 and above, male (% of total)',
 'Population ages 65 and above, total',
 'Population ages 65-69, female (% of female population)',
 'Population ages 65-69, male (% of male population)',
 'Population ages 70-74, female (% of female population)',
 'Population ages 70-74, male (% of male population)',
 'Population ages 75-79, female (% of female population)',
 'Population ages 75-79, male (% of male population)',
 'Population ages 80 and older, female (% of female population)',
 'Population ages 80 and older, male (% of male population)',
 'Population ages10-14, male (% of male population)',
 'Population growth (annual %)',
 'Population, female',
 'Population, female (% of total)',
 'Population, male',
 'Population, male (% of total)',
 'Population, total',
 'Prevalence of anemia among children (% of children under 5)',
 'Prevalence of anemia among non-pregnant women (% of women ages 15-49)',
 'Prevalence of anemia among pregnant women (%)',
 'Prevalence of anemia among women of reproductive age (% of women ages 15-49)',
 'Prevalence of HIV, female (% ages 15-24)',
 'Prevalence of HIV, male (% ages 15-24)',
 'Prevalence of HIV, total (% of population ages 15-49)',
 'Prevalence of overweight (% of adults)',
 'Prevalence of overweight, female (% of female adults)',
 'Prevalence of overweight, male (% of male adults)',
 'Prevalence of undernourishment (% of population)',
 'Probability of dying at age 5-14 years (per 1,000 children age 5)',
 'Risk of catastrophic expenditure for surgical care (% of people at risk)',
 'Risk of impoverishing expenditure for surgical care (% of people at risk)',
 'Rural population',
 'Rural population (% of total population)',
 'Rural population growth (annual %)',
 'School enrollment, primary (% gross)',
 'School enrollment, primary (% net)',
 'School enrollment, primary, female (% gross)',
 'School enrollment, primary, male (% gross)',
 'School enrollment, secondary (% gross)',
 'School enrollment, secondary, female (% gross)',
 'School enrollment, secondary, male (% gross)',
 'School enrollment, tertiary (% gross)',
 'School enrollment, tertiary, female (% gross)',
 'Sex ratio at birth (male births per female births)',
 'Smoking prevalence, females (% of adults)',
 'Smoking prevalence, males (% of adults)',
 'Smoking prevalence, total, ages 15+',
 'Specialist surgical workforce (per 100,000 population)',
 'Suicide mortality rate (per 100,000 population)',
 'Survival to age 65, female (% of cohort)',
 'Survival to age 65, male (% of cohort)',
 'Tuberculosis case detection rate (all forms)',
 'Tuberculosis death rate (per 100,000 people)',
 'Tuberculosis treatment success rate (% of new cases)',
 'UHC service coverage index',
 'Unemployment, female (% of female labor force)',
 'Unemployment, male (% of male labor force)',
 'Unemployment, total (% of total labor force)',
 'Urban population',
 'Urban population (% of total)',
 'Urban population growth (annual %)',
 "Women's share of population ages 15+ living with HIV (%)"]
dfi.set_index('Year', inplace=True)
dfi.head()

##### MatplotLib 

In [None]:
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (100, 50)
# plot the data
plt.plot(dfip['Year'],dfip['Total Population'],color ='blue') 
# We change the fontsize of minor ticks label 
plt.xticks(fontsize=30)
plt.yticks(fontsize=30)
plt.title('Total Population in Irealnd from 1960 -2017', fontsize= 15)
plt.xlabel('Year', fontsize= 15)
plt.ylabel('Total Population', fontsize= 15)
plt.grid(which='both')
plt.show()

In [None]:
plt.rcParams['figure.figsize'] = (20, 10)
dfip.hist(column="Total Population",        # Column to plot
              figsize=(20,20),         # Plot size
              color="blue",          # Plot color
              bins= 5)#,               # Use 50 bins
              #range= (0,3.5))        # Limit x-axis range

###### Seaborne

In [None]:
sns.set(style="whitegrid")
# Initialize the matplotlib figure
f, ax = plt.subplots(figsize=(30, 4))

# Plot the total crashes
sns.set_color_codes("deep")
sns.lineplot(x="Year", y="Total Population", data=dfip, color="r")
#sns.despine()
plt.title('Total Population in Irealnd from 1960 -2017', fontsize= 15)
plt.xlabel('Year', fontsize= 15)
plt.ylabel('Total Population', fontsize= 15)
plt.show()

https://stackoverflow.com/questions/48225888/matplotlib-finance-candlestick-ohlc-plot-intraday-1min-bar-data-with-time-breaks

https://jakevdp.github.io/PythonDataScienceHandbook/04.08-multiple-subplots.html



In [None]:

f, (ax1, ax2) = plt.subplots(2,figsize=(50, 10))
sns.boxplot(x= "Total Population", data=dfip, ax=ax1)
sns.lineplot(x="Year", y="Total Population", data=dfip, ax=ax2)

In [None]:
y_title_margin = 1.2
sns.set(style="white",palette='muted', color_codes=True)
    #rs = np.random.RandomState(10)

    # Set up the matplotlib figure
f, (ax1,ax2,ax3) = plt.subplots(nrows = 3,figsize=(12, 10), sharex=True)
left   =  0.125  # the left side of the subplots of the figure
right  =  0.9    # the right side of the subplots of the figure
bottom =  0.1    # the bottom of the subplots of the figure
top    =  0.9    # the top of the subplots of the figure
wspace =  .5     # the amount of width reserved for blank space between subplots
hspace =  1.1    # the amount of height reserved for white space between subplots

    # This function actually adjusts the sub plots using the above paramters
plt.subplots_adjust(
    left    =  left, 
    bottom  =  bottom, 
    right   =  right, 
    top     =  top, 
    wspace  =  wspace, 
    hspace  =  hspace
)
sns.despine(left=True)

#facetgrid
facet = sns.FacetGrid(dfip, hue="PROC_FLAGS", palette = "Set1");
# #ax1
facet.map(sns.kdeplot,'Total Population',shade= True , bw = 30, kernel='gau', ax = ax1);
#ax2
facet.map(sns.kdeplot,'COV_ALLW_CONTR_AMT_sum',shade= True,bw = 20, ax = ax2);
#ax3
facet.map(sns.kdeplot,'chrg_allw_sum_diff',shade= True, bw = 30,ax = ax3);

#setting stitle name
ax1.set_title("Sum of Charge amount",  y = y_title_margin, fontsize=14)
ax2.set_title("Sum of Allowed amount", y = y_title_margin, fontsize=14)
ax3.set_title("Difference between 'Sum of Charge amount' and 'Sum of Allowed amount'",  y = y_title_margin, fontsize=14)

#ax1
ax1.set_xlabel("Dollars")
ax1.set_ylabel("Probability Distribution")
ax1.set_xlim(-200,300)
ax1.set_ylim(0,0.018)
# title
new_title = 'Type of Claim'
ax1.legend_.set_title(new_title)
# replace labels
new_labels = ['Non-resubmitted','Resubmitted']
for t, l in zip(ax1.legend_.texts, new_labels): t.set_text(l)

#ax2

ax2.set_xlabel("Dollars")
ax2.set_ylabel("Probability Distribution")
ax2.set_xlim(-200,300)
ax2.set_ylim(0,0.018)
#title
new_title = 'Type of Claim'
ax2.legend_.set_title(new_title)
# replace labels
new_labels = ['Non-resubmitted','Resubmitted']
for t, l in zip(ax2.legend_.texts, new_labels): t.set_text(l)
#ax3
ax3.set_xlabel("Dollars")
ax3.set_ylabel("Probability Distribution")
ax3.set_xlim(-200,300)
ax3.set_ylim(0,0.018)
#title
new_title = 'Type of Claim'
ax3.legend_.set_title(new_title)
# replace labels
new_labels = ['Non-resubmitted','Resubmitted']
for t, l in zip(ax3.legend_.texts, new_labels): t.set_text(l)

plt.tight_layout()
plt.close()

