In [1]:
%matplotlib inline

In [2]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import scipy.stats as st
import numpy as np
from scipy.stats import linregress

In [3]:
import pycountry
import pycountry_convert as pc
from pycountry_convert import country_name_to_country_alpha2

In [4]:
## Pulling data from csvs and storing as dataframes and Cleaning our dataframes
pharm_csv = os.path.join(os.getcwd(), "Data", "pharmacists.csv")
pharm_df = pd.read_csv(pharm_csv)
##Changing column names
pharm_df = pharm_df.rename(columns = {"First Tooltip" : "Pharmicists Per 10,000"}).drop(columns =["Indicator"])

In [5]:
med_doctors_csv = os.path.join(os.getcwd(), "Data", "medicalDoctors.csv")
med_doctors_df = pd.read_csv(med_doctors_csv)
##Changing column names
med_doctors_df = med_doctors_df.rename(columns = {"First Tooltip" : "Medical Doctors Per 10,000"}).drop(columns =["Indicator"])

In [6]:
# Taking the csv file of World Life Expectancy & rotating the columns into rows.
life_expect_csv = os.path.join(os.getcwd(), "Data", "LifeExpectancy_world.csv")
life_expect_df = pd.read_csv(life_expect_csv)
life_expect_df = life_expect_df.drop(columns = {"Code"})
life_expect_df = life_expect_df.rename(columns = {'Entity' : 'Location'})
life_expect_df = life_expect_df.melt(['Location'], var_name = 'Period', value_name = 'Life Expectancy')
life_expect_df['Period'] = life_expect_df['Period'].astype(int)
life_expect_df.head()

Unnamed: 0,Location,Period,Life Expectancy
0,Afghanistan,1950,27.638
1,Africa,1950,36.45
2,Albania,1950,54.191
3,Algeria,1950,42.087
4,American Samoa,1950,63.775


In [7]:
hand_wash_csv = os.path.join(os.getcwd(), "Data", "basicHandWashing.csv")
hand_wash_df = pd.read_csv(hand_wash_csv)
##Changing column names
hand_wash_df = hand_wash_df.rename(columns = {"First Tooltip" : "Population with basic handwashing facilities at home (%)"}).drop(columns =["Indicator"])
hand_wash_df = hand_wash_df[hand_wash_df["Dim1"].str.contains("Urban")==False]
hand_wash_df = hand_wash_df[hand_wash_df["Dim1"].str.contains("Rural")==False]
hand_wash_df = hand_wash_df.drop(columns = {"Dim1"})

In [8]:
drink_water_csv = os.path.join(os.getcwd(), "Data", "basicDrinkingWaterServices.csv")
drink_water_df = pd.read_csv(drink_water_csv)
##Changing column names
drink_water_df = drink_water_df.rename(columns = {"First Tooltip" : "Population using at least basic drinking-water services (%)"}).drop(columns =["Indicator"])

In [9]:
sanitize_csv= os.path.join(os.getcwd(), "Data", "atLeastBasicSanitizationServices.csv")
sanitize_df = pd.read_csv(sanitize_csv)
##Changing column names
sanitize_df = sanitize_df.rename(columns = {"First Tooltip" : "Population using at least basic sanitization services (%)"}).drop(columns =["Indicator"])
sanitize_df = sanitize_df[sanitize_df["Dim1"].str.contains("Urban")==False]
sanitize_df = sanitize_df[sanitize_df["Dim1"].str.contains("Rural")==False]
sanitize_df = sanitize_df.drop(columns = {"Dim1"})

In [10]:
# Merge the Data
merge_df1 = pd.merge(pharm_df, med_doctors_df, on = ["Location", "Period"], how = "outer")
merge_df1

Unnamed: 0,Location,Period,"Pharmicists Per 10,000","Medical Doctors Per 10,000"
0,Afghanistan,2016,0.47,2.78
1,Afghanistan,2015,0.50,2.85
2,Afghanistan,2014,0.51,2.98
3,Afghanistan,2012,0.27,2.41
4,Afghanistan,2011,0.28,2.52
...,...,...,...,...
2620,Zimbabwe,2011,,0.82
2621,Zimbabwe,2008,,0.60
2622,Zimbabwe,2007,,0.54
2623,Zimbabwe,2005,,1.26


In [11]:
merge_df2 = pd.merge(merge_df1, life_expect_df, on = ["Location", "Period"], how = "outer")
merge_df2

Unnamed: 0,Location,Period,"Pharmicists Per 10,000","Medical Doctors Per 10,000",Life Expectancy
0,Afghanistan,2016,0.47,2.78,63.763
1,Afghanistan,2015,0.50,2.85,63.377
2,Afghanistan,2014,0.51,2.98,62.966
3,Afghanistan,2012,0.27,2.41,62.054
4,Afghanistan,2011,0.28,2.52,61.553
...,...,...,...,...,...
17253,Western Sahara,2019,,,70.263
17254,World,2019,,,72.584
17255,Yemen,2019,,,66.125
17256,Zambia,2019,,,63.886


In [12]:
merge_df3 = pd.merge(merge_df2, hand_wash_df, on = ["Location", "Period"], how = "outer")
merge_df4 = pd.merge(merge_df3, drink_water_df, on = ["Location", "Period"], how = "outer")
final_merge_df = pd.merge(merge_df4, sanitize_df, on = ["Location", "Period"], how = "outer")
final_merge_df

Unnamed: 0,Location,Period,"Pharmicists Per 10,000","Medical Doctors Per 10,000",Life Expectancy,Population with basic handwashing facilities at home (%),Population using at least basic drinking-water services (%),Population using at least basic sanitization services (%)
0,Afghanistan,2016,0.47,2.78,63.763,37.67,54.84,42.05
1,Afghanistan,2015,0.50,2.85,63.377,37.59,52.39,40.71
2,Afghanistan,2014,0.51,2.98,62.966,37.52,49.96,39.37
3,Afghanistan,2012,0.27,2.41,62.054,37.37,45.19,36.75
4,Afghanistan,2011,0.28,2.52,61.553,37.30,42.84,35.46
...,...,...,...,...,...,...,...,...
17421,Viet Nam,2004,,,,,79.45,59.79
17422,Viet Nam,2003,,,,,78.47,57.95
17423,Viet Nam,2000,,,,,75.54,52.37
17424,Republic of Korea,2001,,,,,,100.00


In [12]:
final_merge_df.describe()

Unnamed: 0,Period,"Pharmicists Per 10,000","Medical Doctors Per 10,000",Life Expectancy,Population with basic handwashing facilities at home (%),Population using at least basic drinking-water services (%),Population using at least basic sanitization services (%)
count,17426.0,1795.0,2506.0,17010.0,921.0,3455.0,3439.0
mean,1985.038334,4.124118,20.685012,63.579816,47.501911,77.847103,71.211966
std,20.283229,3.62461,14.299267,12.073683,32.774406,23.98757,30.342367
min,1950.0,0.002,0.13,18.907,1.03,10.13,3.4
25%,1967.0,0.79,7.7825,55.67275,13.98,59.895,45.85
50%,1985.0,3.53,21.28,66.5575,44.52,87.65,85.26
75%,2003.0,6.39,31.66,72.747,81.74,98.46,97.63
max,2019.0,26.3,84.22,86.751,100.0,100.0,100.0


In [None]:
# Need to create a column called "Continent" in the final_merge_data

In [None]:
#Put info into Continent Row based on Location
for index, row in d3.iterrows():
    # get continent type from df
    country = row[‘Location’]
    # print(country)
    c_to_a = pc.country_name_to_country_alpha2(country)
    # print(c_to_a)    
    c_to_c = pc.country_alpha2_to_continent_code(c_to_a)   
    d3.loc[index, “Continents”]= c_to_c
d3

264

In [14]:
# Generate a bar plot showing the total number of timepoints for all mice tested for each drug regimen using Pandas.
# drinking_water_df = pd.DataFrame(final_merge_df.groupby(["Location","Population using at least basic drinking-water services (%)"]).count()).reset_index() 

# #Alter the dataframe down to two columns 
# drinking_water_df = drinking_water_df[["Location", ("Population using at least basic drinking-water services (%)")]] 
# drinking_water_df = drinking_water_df.set_index("Location")
# drinking_water_df.head()

In [15]:
# Create the Bar Chart
# drinking_water_df.plot(kind="bar",figsize=(25,5), color="b",fontsize = 14)                           
# plt.title("Average Basic Drinking Water by Year",fontsize = 20)
# plt.xlabel("Year",fontsize = 14)
# plt.ylabel("% of Pop Using Basic Drinking Water",fontsize = 14)
# plt.tight_layout()
# plt.show()

In [17]:
# drinking_water_mean = final_merge_df.groupby(["Location","Period"])["Population using at least basic drinking-water services (%)"].mean()
# drinking_water_median = final_merge_df.groupby(["Location","Period"])["Population using at least basic drinking-water services (%)"].median()
# drinking_water_var = final_merge_df.groupby(["Location","Period"])["Population using at least basic drinking-water services (%)"].var()
# drinking_water_std = final_merge_df.groupby(["Location","Period"])["Population using at least basic drinking-water services (%)"].std()
# drinking_water_sem = final_merge_df.groupby(["Location","Period"])["Population using at least basic drinking-water services (%)"].sem()

In [16]:
# drinking_water_table = pd.DataFrame({"Mean": drinking_water_mean, "Median":drinking_water_median, "Variance":drinking_water_var, "Standard Deviation": drinking_water_std, "SEM": drinking_water_sem})
# drinking_water_table.head()

In [17]:
# # Generate a line plot of Basic Drinking Water vs. Location
# water_df = final_merge_df.loc[final_merge_df["Location"] == "Population using at least basic drinking-water services (%)",:]
# water_df.head()

In [None]:
# x_axis = water_df[""]

# fig1, ax1 = plt.subplots(figsize=(15, 10))
# plt.title("Percent of Basic Drinking Water by Location",fontsize =25)
# plt.plot(x_axis, linewidth=2, markersize=15,marker="o",color="blue", label="Fahreneit")
# plt.xlabel("Population using at least basic drinking-water services (%)",fontsize =14)
# plt.ylabel("Location",fontsize =14)

# plt.show()

In [14]:
# asia_continent = ["China","India","Indonesia","Pakistan","Bangladesh","Japan","Philippines","Vietnam","Turkey",
# "Iran","Thailand","Myanmar","South Korea","Iraq","Afghanistan","Saudi Arabia","Uzbekistan","Malaysia","Yemen",
# "Nepal","North Korea","Taiwan","Sri Lanka","Kazakhstan","Syria","Cambodia","Jordan","Azerbaijan",
# "United Arab Emirates","Tajikistan","Israel","Hong Kong","Laos","Lebanon","Kyrgyzstan","Turkmenistan",
# "Singapore","Oman","State of Palestine","Kuwait","Georgia","Mongolia","Armenia","Qatar","Bahrain","Timor-Leste",
# "Cyprus","Bhutan","Macao","Maldives","Brunei Darussalam"]

In [15]:
# africa_continent = ["Nigeria","Ethiopia","Egypt","Democratic Republic of the Congo","Tanzania","South Africa",
# "Kenya","Uganda","Algeria","Sudan","Morocco","Angola","Mozambique","Ghana","Madagascar","Cameroon","Côte d'Ivoire",
# "Niger","Burkina Faso","Mali","Malawi","Zambia","Senegal","Chad","Somalia","Zimbabwe","Guinea","Rwanda","Benin",
# "Burundi","Tunisia","South Sudan","Togo","Sierra Leone","Libya","Congo","Liberia","Central African Republic",
# "Mauritania","Eritrea","Namibia","Gambia","Botswana","Gabon","Lesotho","Guinea-Bissau","Equatorial Guinea",
# "Mauritius","Eswatini","Djibouti","Réunion","Comoros","Western Sahara","Cabo Verde","Mayotte","Sao Tome and Principe",
# "Seychelles","Saint Helena"]

In [16]:
# europe_continent = ["Russia","Germany","United Kingdom","France","Italy","Spain","Ukraine","Poland","Romania",
# "Netherlands","Belgium","Czechia","Greece","Portugal","Sweden","Hungary","Belarus","Austria","Serbia","Switzerland",
# "Bulgaria","Denmark","Finland","Slovakia","Norway","Ireland","Croatia","Moldova","Bosnia and Herzegovina","Albania",
# "Lithuania","North Macedonia","Slovenia","Latvia","Estonia","Montenegro","Luxembourg","Malta","Iceland","Channel Islands",
# "Isle of Man","Andorra","Faeroe Islands","Monaco","Liechtenstein","San Marino","Gibraltar","Holy See"]

In [17]:
# north_america_continent = ["United States","Mexico","Canada","Guatemala","Haiti","Cuba","Dominican Republic","Honduras",
# "Nicaragua","El Salvador","Costa Rica","Panama","Jamaica","Puerto Rico","Trinidad and Tobago","Guadeloupe","Belize",
# "Bahamas","Martinique","Barbados","Saint Lucia","Curaçao","Grenada","Saint Vincent and the Grenadines","Aruba",
# "United States Virgin Islands","Antigua and Barbuda","Dominica","Cayman Islands","Bermuda","Greenland","Saint Kitts and Nevis",
# "Sint Maarten","Turks and Caicos Islands","Saint Martin","British Virgin Islands","Caribbean Netherlands","Anguilla",
# "Saint Barthélemy","Saint Pierre and Miquelon","Montserrat"]

In [21]:
# south_america_continent = ["Brazil","Colombia","Argentina","Peru","Venezuela","Chile","Ecuador","Bolivia",
# "Paraguay","Uruguay","Guyana","Suriname","French Guiana","Falkland Islands"]

TypeError: 'type' object is not subscriptable

In [19]:
# australia_oceania_continent = ["Australia","Papua New Guinea","New Zealand","Fiji","Solomon Islands",
# "Micronesia","Vanuatu","New Caledonia","French Polynesia","Samoa","Guam","Kiribati","Tonga",
# "Marshall Islands","Northern Mariana Islands","American Samoa","Palau","Cook Islands","Tuvalu",
# "Wallis and Futuna Islands","Nauru","Niue","Tokelau"]

In [18]:
# as_df = pd.DataFrame({"Continent": asia_continent,})
# af_df = pd.DataFrame({"Continent": africa_continent})
# eu_df = pd.DataFrame({"Continent":europe_continent })
# na_df = pd.DataFrame({"Continent": north_america_continent})
# sa_df = pd.DataFrame({"Continet":south_america_continent })
# oc_df = pd.DataFrame({"Continent": australia_oceania_continent })
# oc_df.head()

In [19]:
# continent_df = pd.merge(as_df, af_df, on = ["Continent"], how = "left")
# continent_df.head()

In [29]:
# continent_df1 = pd.merge(continent_df, eu_df, on = ["Continent"], how = "left")
# continent_df1.head()

Unnamed: 0,Continent
0,China
1,India
2,Indonesia
3,Pakistan
4,Bangladesh


In [30]:
# continent_df2 = pd.merge(continent_df1, na_df, on = ["Continent"], how = "left")
# continent_df2.head()

Unnamed: 0,Continent
0,China
1,India
2,Indonesia
3,Pakistan
4,Bangladesh


In [None]:
# continent_df3 = pd.merge(continent_df2, sa_df, on = ["Continent"], how = "left")

In [20]:
# final_continent_df = pd.merge(continent_df2, oc_df, on = ["Continent"], how = "left")
# final_continent_df.count()