In [16]:
%matplotlib notebook

import os
import csv
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sn
from scipy.stats import sem

# This notebook is intended for the case in which we decide to analyze correlations to life expectancy

# ESTABLISH PATHS

# Probability (%) of dying between age 30 and exact age 70 from cardiovascular disease, cancer, diabetes, or chronic respiratory disease
cancer_etc_path = "./data/good_data/30-70cancerChdEtc.csv"
# could also analyze healthy life expectancy at birth (?) instead of cancer etc.
hale_birth_path = "./data/good_data/HALElifeExpectancyAtBirth.csv"

# Access to sanitation, hand washing, drinking water, and safely managed sanitation facilities
sanitation_path = "./data/good_data/atLeastBasicSanitizationServices.csv"
handwashing_path = "./data/good_data/basicHandWashing.csv"
drinkingwater_path = "./data/good_data/basicDrinkingWaterServices.csv"
safelysanitation_path = "./data/good_data/safelySanitization.csv"

# Primary reliance on clean fuels and technologies
cleantech_path = "./data/good_data/cleanFuelAndTech.csv"

# Dentists (per 10,000 people)
dentists_path = "./data/good_data/dentists.csv"

# Medical doctors (per 10,000 people)
doctors_path = "./data/good_data/medicalDoctors.csv"

# Mortality rate due to unsafe wash (per 100,000 people)
unsafewash_path = "./data/good_data/mortalityRateUnsafeWash.csv"

# Married or in-union women of reproductive age who have their need for family planning satisfied with modern methods (%)
reproductiveneeds_path = "./data/good_data/reproductiveAgeWomen.csv"

# Age-standardized prevalence of current tobacco smoking among persons aged 15 years and older
tobacco_path = "./data/good_data/tobaccoAge15.csv"

# CONVERT TO DATA FRAMES

# Healthy life expectancy (HALE) at birth
hale_birth_df = pd.read_csv(hale_birth_path, encoding = "utf-8")
hale_birth_df

# Probability (%) of dying between 30 and 70 of cancer, cardiovascular disease, diabetes or chronic respiratory disease
cancer_etc_df = pd.read_csv(cancer_etc_path, encoding = "utf-8")
cancer_etc_df.head()

# Access to at least basic sanitation (%)
sanitation_df = pd.read_csv(sanitation_path, encoding = "utf-8")
sanitation_df

# Access to handwashing facilities (%)
handwashing_df = pd.read_csv(handwashing_path, encoding = "utf-8")
handwashing_df

# Population using at least basic drinking-water services (%)
drinkingwater_df = pd.read_csv(drinkingwater_path, encoding = "utf-8")
drinkingwater_df

# Proportion of population with primary reliance on clean fuels and technologies (%)
cleantech_df = pd.read_csv(cleantech_path, encoding = "utf-8")
cleantech_df

# Dentists (per 10,000 people)
dentists_df = pd.read_csv(dentists_path, encoding = "utf-8")
dentists_df

# Medical doctors (per 10,000 people)
doctors_df = pd.read_csv(doctors_path, encoding = "utf-8")
doctors_df

# Mortality rate attributed to exposure to unsafe WASH services (per 100,000 population)
unsafewash_df = pd.read_csv(unsafewash_path, encoding = "utf-8")
unsafewash_df

# Married or in-union women of reproductive age with their family planning needs satisfied by modern methods (%)
reproductiveneeds_df = pd.read_csv(reproductiveneeds_path, encoding = "utf-8")
reproductiveneeds_df

# Population using safely managed sanitation services (%)
safelysanitation_df = pd.read_csv(safelysanitation_path, encoding = "utf-8")
safelysanitation_df

# Age-standardized prevalence of current tobacco smoking among persons aged 15 years and older (%)
tobacco_df = pd.read_csv(tobacco_path, encoding = "utf-8")
tobacco_df

Unnamed: 0,Location,Indicator,Period,Dim1,First Tooltip
0,Albania,Age-standardized prevalence of current tobacco...,2018,Both sexes,29.2
1,Albania,Age-standardized prevalence of current tobacco...,2018,Male,50.5
2,Albania,Age-standardized prevalence of current tobacco...,2018,Female,7.9
3,Albania,Age-standardized prevalence of current tobacco...,2017,Both sexes,29.3
4,Albania,Age-standardized prevalence of current tobacco...,2017,Male,50.6
...,...,...,...,...,...
4018,Zimbabwe,Age-standardized prevalence of current tobacco...,2005,Male,31.6
4019,Zimbabwe,Age-standardized prevalence of current tobacco...,2005,Female,2.3
4020,Zimbabwe,Age-standardized prevalence of current tobacco...,2000,Both sexes,18.3
4021,Zimbabwe,Age-standardized prevalence of current tobacco...,2000,Male,33.7


In [None]:
# 