In [1]:
%matplotlib inline

In [2]:
import pandas as pd
import os
import matplotlib.pyplot as pyplot
import scipy.stats as st
import numpy as np
from scipy.stats import linregress

In [3]:
## Pulling data from csvs and storing as dataframes and Cleaning our dataframes
pharm_csv = os.path.join(os.getcwd(), "Data", "pharmacists.csv")
pharm_df = pd.read_csv(pharm_csv)
##Changing column names
pharm_df = pharm_df.rename(columns = {"First Tooltip" : "Pharmicists Per 10,000"}).drop(columns =["Indicator"])

In [4]:
med_doctors_csv = os.path.join(os.getcwd(), "Data", "medicalDoctors.csv")
med_doctors_df = pd.read_csv(med_doctors_csv)
##Changing column names
med_doctors_df = med_doctors_df.rename(columns = {"First Tooltip" : "Medical Doctors Per 10,000"}).drop(columns =["Indicator"])

In [5]:
life_expect_csv = os.path.join(os.getcwd(), "Data", "Life expectancy.csv")
life_expect_df = pd.read_csv(life_expect_csv)
##Changing column names
life_expect_df = life_expect_df.rename(columns = {"Entity" : "Location", "Year" : "Period"})

In [6]:
hand_wash_csv = os.path.join(os.getcwd(), "Data", "basicHandWashing.csv")
hand_wash_df = pd.read_csv(hand_wash_csv)
##Changing column names
hand_wash_df = hand_wash_df.rename(columns = {"First Tooltip" : "Population with basic handwashing facilities at home (%)"}).drop(columns =["Indicator"])
hand_wash_df = hand_wash_df[hand_wash_df["Dim1"].str.contains("Urban")==False]
hand_wash_df = hand_wash_df[hand_wash_df["Dim1"].str.contains("Rural")==False]
hand_wash_df = hand_wash_df.drop(columns = {"Dim1"})

In [7]:
drink_water_csv = os.path.join(os.getcwd(), "Data", "basicDrinkingWaterServices.csv")
drink_water_df = pd.read_csv(drink_water_csv)
##Changing column names
drink_water_df = drink_water_df.rename(columns = {"First Tooltip" : "Population using at least basic drinking-water services (%)"}).drop(columns =["Indicator"])

In [8]:
sanitize_csv= os.path.join(os.getcwd(), "Data", "atLeastBasicSanitizationServices.csv")
sanitize_df = pd.read_csv(sanitize_csv)
##Changing column names
sanitize_df = sanitize_df.rename(columns = {"First Tooltip" : "Population using at least basic sanitization services (%)"}).drop(columns =["Indicator"])
sanitize_df = sanitize_df[sanitize_df["Dim1"].str.contains("Urban")==False]
sanitize_df = sanitize_df[sanitize_df["Dim1"].str.contains("Rural")==False]
sanitize_df = sanitize_df.drop(columns = {"Dim1"})

In [9]:
# Merge the Data
merge_df1 = pd.merge(pharm_df, med_doctors_df, on = ["Location", "Period"], how = "outer")
merge_df1

Unnamed: 0,Location,Period,"Pharmicists Per 10,000","Medical Doctors Per 10,000"
0,Afghanistan,2016,0.47,2.78
1,Afghanistan,2015,0.50,2.85
2,Afghanistan,2014,0.51,2.98
3,Afghanistan,2012,0.27,2.41
4,Afghanistan,2011,0.28,2.52
...,...,...,...,...
2620,Zimbabwe,2011,,0.82
2621,Zimbabwe,2008,,0.60
2622,Zimbabwe,2007,,0.54
2623,Zimbabwe,2005,,1.26


In [10]:
merge_df2 = pd.merge(merge_df1, life_expect_df, on = ["Location", "Period"], how = "outer")
merge_df2

Unnamed: 0,Location,Period,"Pharmicists Per 10,000","Medical Doctors Per 10,000",Life expectancy
0,Afghanistan,2016,0.47,2.78,
1,Afghanistan,2015,0.50,2.85,
2,Afghanistan,2014,0.51,2.98,
3,Afghanistan,2012,0.27,2.41,
4,Afghanistan,2011,0.28,2.52,
...,...,...,...,...,...
5637,United States,2012,,,78.940002
5638,United States,2013,,,78.959999
5639,United States,2014,,,78.940002
5640,United States,2015,,,78.870003


In [11]:
merge_df3 = pd.merge(merge_df2, hand_wash_df, on = ["Location", "Period"], how = "outer")
merge_df4 = pd.merge(merge_df3, drink_water_df, on = ["Location", "Period"], how = "outer")
final_merge_df = pd.merge(merge_df4, sanitize_df, on = ["Location", "Period"], how = "outer")
final_merge_df

Unnamed: 0,Location,Period,"Pharmicists Per 10,000","Medical Doctors Per 10,000",Life expectancy,Population with basic handwashing facilities at home (%),Population using at least basic drinking-water services (%),Population using at least basic sanitization services (%)
0,Afghanistan,2016,0.47,2.78,,37.67,54.84,42.05
1,Afghanistan,2015,0.50,2.85,,37.59,52.39,40.71
2,Afghanistan,2014,0.51,2.98,,37.52,49.96,39.37
3,Afghanistan,2012,0.27,2.41,,37.37,45.19,36.75
4,Afghanistan,2011,0.28,2.52,,37.30,42.84,35.46
...,...,...,...,...,...,...,...,...
7079,Zimbabwe,2003,,,,,58.59,46.11
7080,Zimbabwe,2002,,,,,59.23,46.15
7081,Zimbabwe,2001,,,,,59.88,46.05
7082,Republic of Korea,2001,,,,,,100.00


In [12]:
final_merge_df.describe()

Unnamed: 0,Period,"Pharmicists Per 10,000","Medical Doctors Per 10,000",Life expectancy,Population with basic handwashing facilities at home (%),Population using at least basic drinking-water services (%),Population using at least basic sanitization services (%)
count,7084.0,1795.0,2506.0,3253.0,921.0,3455.0,3439.0
mean,1961.520045,4.124118,20.685012,48.68038,47.501911,77.847103,71.211966
std,65.208711,3.62461,14.299267,17.965669,32.774406,23.98757,30.342367
min,1800.0,0.002,0.13,8.108836,1.03,10.13,3.4
25%,1918.0,0.79,7.7825,32.0,13.98,59.895,45.85
50%,2000.0,3.53,21.28,41.880001,44.52,87.65,85.26
75%,2009.0,6.39,31.66,66.82,81.74,98.46,97.63
max,2018.0,26.3,84.22,83.940002,100.0,100.0,100.0


In [13]:
# Finding the number of countries
country = final_merge_df["Location"].nunique()
country

198

In [15]:
final_merge_df.groupby(["Location"])["Population using at least basic drinking-water services (%)"].mean()


Location
Afghanistan                            37.755000
Albania                                84.061667
Algeria                                86.305000
Andorra                               100.000000
Angola                                 24.313889
                                         ...    
Venezuela (Bolivarian Republic of)     96.187778
Viet Nam                               83.970556
Yemen                                  40.171667
Zambia                                 36.503889
Zimbabwe                               55.077778
Name: Population using at least basic drinking-water services (%), Length: 198, dtype: float64

In [16]:
final_merge_df.groupby(["Location"])["Population using at least basic drinking-water services (%)"].median()

Location
Afghanistan                            37.100
Albania                                82.215
Algeria                                86.365
Andorra                               100.000
Angola                                 24.325
                                       ...   
Venezuela (Bolivarian Republic of)     96.190
Viet Nam                               83.860
Yemen                                  40.170
Zambia                                 36.590
Zimbabwe                               55.085
Name: Population using at least basic drinking-water services (%), Length: 198, dtype: float64

In [17]:
 final_merge_df.groupby(["Location"])["Population using at least basic drinking-water services (%)"].var()

Location
Afghanistan                           136.584379
Albania                                11.207215
Algeria                                 3.315509
Andorra                                 0.000000
Angola                                  3.901755
                                         ...    
Venezuela (Bolivarian Republic of)      0.084948
Viet Nam                               28.983217
Yemen                                 116.555591
Zambia                                 11.750002
Zimbabwe                               10.950689
Name: Population using at least basic drinking-water services (%), Length: 198, dtype: float64

In [18]:
 final_merge_df.groupby(["Location"])["Population using at least basic drinking-water services (%)"].std()

Location
Afghanistan                           11.686932
Albania                                3.347718
Algeria                                1.820854
Andorra                                0.000000
Angola                                 1.975286
                                        ...    
Venezuela (Bolivarian Republic of)     0.291458
Viet Nam                               5.383606
Yemen                                 10.796091
Zambia                                 3.427828
Zimbabwe                               3.309183
Name: Population using at least basic drinking-water services (%), Length: 198, dtype: float64

In [19]:
 final_merge_df.groupby(["Location"])["Population using at least basic drinking-water services (%)"].sem()

Location
Afghanistan                           2.754636
Albania                               0.789065
Algeria                               0.429179
Andorra                               0.000000
Angola                                0.465579
                                        ...   
Venezuela (Bolivarian Republic of)    0.068697
Viet Nam                              1.268928
Yemen                                 2.544663
Zambia                                0.807947
Zimbabwe                              0.779982
Name: Population using at least basic drinking-water services (%), Length: 198, dtype: float64

In [37]:
# ! pip install pycountry-convert

In [28]:
# import pycountry_convert
# import pycountry

In [32]:
# from pycountry_convert import country_name_to_country_alpha2

In [38]:
# df = country_name_to_country_alpha2(final_merge_df["Location"],cn_name_format="default")