In [1]:
# Standard modules
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

# Other modules
import sys
import time

# Import suftware
sys.path.append('../')
import suftware as ft

In [2]:
df = pd.read_excel('../examples/data/raw/who_data_2017.xlsx', sheetname='Annex B', header=2)
col_names = [
    'state',
    'population (thousands)',
    'life_expectancy_male (years)',
    'life_expectancy_female (years)',
    'life_expectancy_all (years)',
    'healthy_life_expectancy (years)',
    'maternal_mortaility (per 100,000 live births)',
    'attended_births (percentage)',
    'drop',
    'under_five_mortatility (per 1000 live births)',
    'neonatal_mortality (per 1000 live births)',
    'new_hiv_infections (per 1000 uninfected population)',
    'tb_incidence (per 100,000 population)',
    'malaria incidence (per 100,000 population)',
    'hepatitis_b_vaccination (percent)',
    'drop',
    'drop',
    'suicide_mortality (per 100,000 population)',
    'drop',
    'alcohol_consumption (pure liters per year)',
    'road_traffic_mortality (per 100,000 population)',
    'drop',
    'family_planning_needs_met (percentage)',
    'adolescent_birth_rate (per 1000 population)',
    'drop',
    'drop'
]

#df.columns = col_names
df = df.loc[2:195,:]
for n, c in enumerate(df.columns):
    print('%d: %s'%(n,c))
df.columns = col_names
del df['drop']
df.set_index('state', inplace=True)
df.replace(regex='<|>| ',value='', inplace=True)
df.replace(to_replace=df.iloc[3,1],value='NaN', inplace=True)
df.astype(dtype=float,inplace=True)
df.head()

0: Member State
1: Total population a (000s)
2: Life expectancy at birth b (years)
3: Unnamed: 3
4: Unnamed: 4
5: Healthy life expectancy at birth b (years)
6: Maternal mortality ratio c (per 100 000 live births)
7: Proportion of births attended by skilled health personnel d (%)
8: Unnamed: 8
9: Under-five mortality rate e (per 1000 live births)
10: Neonatal mortality rate e (per 1000 live births)
11: New HIV infections among adults 15-49 years old f (per 1000 uninfected population)
12: TB incidence g (per 100 000 population)
13: Malaria incidence h (per 1000 population at risk)
14: Infants receiving three doses of hepatitis B vaccine i (%)
15: Reported number of people requiring interventions against NTDs j
16: Probability of dying from any of CVD, cancer, diabetes, CRD between age 30 and exact age 70 k (%)
17: Suicide mortality rate k (per 100 000 population)
18: Unnamed: 18
19: Total alcohol per capita (>=Â 15 years of age) consumption (litres of pure alcohol), projected estimates l

Unnamed: 0_level_0,population (thousands),life_expectancy_male (years),life_expectancy_female (years),life_expectancy_all (years),healthy_life_expectancy (years),"maternal_mortaility (per 100,000 live births)",attended_births (percentage),under_five_mortatility (per 1000 live births),neonatal_mortality (per 1000 live births),new_hiv_infections (per 1000 uninfected population),"tb_incidence (per 100,000 population)","malaria incidence (per 100,000 population)",hepatitis_b_vaccination (percent),"suicide_mortality (per 100,000 population)",alcohol_consumption (pure liters per year),"road_traffic_mortality (per 100,000 population)",family_planning_needs_met (percentage),adolescent_birth_rate (per 1000 population)
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
Afghanistan,32527,59.3,61.9,60.5,52.2,396.0,51.0,91.1,35.5,0.06,189.0,23.6,78,5.5,0.5,15.5,,51.9
Albania,2897,75.1,80.7,77.8,68.8,29.0,99.0,14.0,6.2,,19.0,,98,4.3,5.7,15.1,12.9,19.7
Algeria,39667,73.8,77.5,75.6,66.0,140.0,97.0,25.5,15.5,0.02,75.0,0.1,95,3.1,1.0,23.8,77.2,12.4
Andorra,70,,,,,,,2.8,1.4,,6.5,,94,,10.5,7.6,,4.4
Angola,25022,50.9,54.0,52.4,45.8,477.0,47.0,156.9,48.7,1.86,370.0,124.0,64,20.5,10.8,26.9,,190.9


In [3]:

# Write datasets
for col in df.columns:
    # Extract information
    values = df[col].astype(float).values
    values = values[np.isfinite(values)]
    name = col.split()[0]
    units = col.split('(')[1].split(')')[0]
    
    # Format file contentx
    header = '# "description": "%s"\n# "units": "%s"\n'%(name,units)
    if (name in ['population']) or ('percent' in units):
        contents = ''.join(['%d\n'%x for x in values])
    else:
        contents = ''.join(['%.1f\n'%x for x in values])
    
    # Write file
    file_name = '../examples/data/%s.txt'%name
    print('Writing %s'%file_name)
    with open(file_name,'w') as f:
        f.write(header)
        f.write(contents)

Writing ../examples/data/population.txt
Writing ../examples/data/life_expectancy_male.txt
Writing ../examples/data/life_expectancy_female.txt
Writing ../examples/data/life_expectancy_all.txt
Writing ../examples/data/healthy_life_expectancy.txt
Writing ../examples/data/maternal_mortaility.txt
Writing ../examples/data/attended_births.txt
Writing ../examples/data/under_five_mortatility.txt
Writing ../examples/data/neonatal_mortality.txt
Writing ../examples/data/new_hiv_infections.txt
Writing ../examples/data/tb_incidence.txt
Writing ../examples/data/malaria.txt
Writing ../examples/data/hepatitis_b_vaccination.txt
Writing ../examples/data/suicide_mortality.txt
Writing ../examples/data/alcohol_consumption.txt
Writing ../examples/data/road_traffic_mortality.txt
Writing ../examples/data/family_planning_needs_met.txt
Writing ../examples/data/adolescent_birth_rate.txt


In [4]:
import os
os.listdir('../examples/data')

['population.txt',
 'new_hiv_infections.txt',
 'attended_births.txt',
 'hepatitis_b_vaccination.txt',
 'road_traffic_mortality.txt',
 '.DS_Store',
 'healthy_life_expectancy.txt',
 'tb_incidence.txt',
 'family_planning_needs_met.txt',
 'malaria.txt',
 'old_faithful_eruption_times.txt',
 'buffalo_snowfall.txt',
 'suicide_mortality.txt',
 'alcohol_consumption.txt',
 'adolescent_birth_rate.txt',
 'maternal_mortaility.txt',
 'treatment_length.txt',
 'neonatal_mortality.txt',
 'under_five_mortatility.txt',
 'life_expectancy_female.txt',
 'old_faithful_waiting_times.txt',
 'life_expectancy_male.txt',
 'raw',
 'life_expectancy_all.txt']