UNICEF Water vs. GDP vs. Cholera Mortality Rate

In [3]:
#importing dependencies
import pandas as pd 
from sqlalchemy import create_engine

UNICEF Dataset - clean & convert to dataframe

In [4]:
#loading UNICEF file
csv_file = "../Project-2/drink_water.csv"
unicef_df = pd.read_csv(csv_file, encoding="ISO-8859-1")
unicef_df.head()

Unnamed: 0,Country_Code,Country,Years,Total improved,Piped on premises,Other improved,Other unimproved,Surface water
0,AFG,Afghanistan,1990.0,-,1,-,-,-
1,AFG,Afghanistan,2015.0,55,12,43,39,6
2,ALB,Albania,1990.0,-,-,-,-,-
3,ALB,Albania,2015.0,95,82,13,5,0
4,DZA,Algeria,1990.0,91,67,24,8,1


In [5]:
#only pull 2015 years
unicef2015_df = unicef_df.loc[unicef_df["Years"] == 2015.0, :]

In [6]:
#remove decimal from years
pd.options.display.float_format = '{:.0f}'.format
unicef2015_df.head()

Unnamed: 0,Country_Code,Country,Years,Total improved,Piped on premises,Other improved,Other unimproved,Surface water
1,AFG,Afghanistan,2015,55,12,43,39,6
3,ALB,Albania,2015,95,82,13,5,0
5,DZA,Algeria,2015,84,77,7,16,0
7,ASM,American Samoa,2015,100,92,8,0,0
9,AND,Andorra,2015,100,100,0,0,0


GDP Dataset - transform & convert to dataframe

In [7]:
#import GDP CSV
csv_file = "../Project-2/Country GDP by year in USD 1960 to 2015.csv"
gdp_df = pd.read_csv(csv_file, encoding="ISO-8859-1")
gdp_df.head()

Unnamed: 0,Country Name,Country Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015
0,Aruba,ABW,,,,,,,,,...,2421474860,2623726257,2791960894,2498932961,2467703911,2584463687,,,,
1,Andorra,AND,,,,,,,,,...,3536451646,4010785102,4001349340,3649863493,3346317329,3427235709,3146177741.0,3249100667.0,,
2,Afghanistan,AFG,537777812.0,548888895.0,546666678.0,751111191.0,800000046.0,1006666638.0,1399999966.0,1673333419.0,...,7057598407,9843842455,10190529882,12486943506,15936800636,17930239400,20536542737.0,20046334304.0,20050189882.0,19199437989.0
3,Angola,AGO,,,,,,,,,...,41789478661,60448921272,84178032716,75492384801,82470913121,104116000000,115398000000.0,124912000000.0,126775000000.0,102643000000.0
4,Albania,ALB,,,,,,,,,...,8992642349,10701011897,12881352688,12044212904,11926953259,12890867539,12319784787.0,12781029644.0,13277963807.0,11455595709.0


In [16]:
#drop all columns except 2015
gdp2015_df = gdp_df[['Country Name', 'Country Code', '2015']].copy()
gdp2015_df.head()

Unnamed: 0,Country Name,Country Code,2015
0,Aruba,ABW,
1,Andorra,AND,
2,Afghanistan,AFG,19199437989.0
3,Angola,AGO,102643000000.0
4,Albania,ALB,11455595709.0


In [17]:
#rename
gdp2015_df = gdp2015_df.rename(index=str, columns={"Country Name": "country", "Country Code": "Country_Code"})
gdp2015_df.head()

Unnamed: 0,country,Country_Code,2015
0,Aruba,ABW,
1,Andorra,AND,
2,Afghanistan,AFG,19199437989.0
3,Angola,AGO,102643000000.0
4,Albania,ALB,11455595709.0


Merge UNICEF & GDP CSV files

In [18]:
# Merge the two DataFrames together based on the Country Code
country_df = pd.merge(unicef2015_df, gdp2015_df, on="Country_Code")

Unnamed: 0,Country_Code,Country,Years,Total improved,Piped on premises,Other improved,Other unimproved,Surface water,country,2015
0,AFG,Afghanistan,2015,55,12,43,39,6,Afghanistan,19199437989.0
1,ALB,Albania,2015,95,82,13,5,0,Albania,11455595709.0
2,DZA,Algeria,2015,84,77,7,16,0,Algeria,166839000000.0
3,ASM,American Samoa,2015,100,92,8,0,0,American Samoa,
4,AND,Andorra,2015,100,100,0,0,0,Andorra,


In [24]:
# delete country column; rename 2015 to be more clear
country_df = country_df.drop(columns=["country"], axis=1)
country_df = country_df.rename(index=str, columns={"2015":"2015 GDP"})
country_df

Unnamed: 0,Country_Code,Country,Years,Total improved,Piped on premises,Other improved,Other unimproved,Surface water,2015 GDP
0,AFG,Afghanistan,2015,55,12,43,39,6,19199437989
1,ALB,Albania,2015,95,82,13,5,0,11455595709
2,DZA,Algeria,2015,84,77,7,16,0,166839000000
3,ASM,American Samoa,2015,100,92,8,0,0,
4,AND,Andorra,2015,100,100,0,0,0,
5,AGO,Angola,2015,49,15,34,20,31,102643000000
6,ATG,Antigua and Barbuda,2015,98,-,-,2,-,1297285370
7,ARG,Argentina,2015,99,98,1,1,0,
8,ARM,Armenia,2015,100,99,1,0,0,10561401185
9,ABW,Aruba,2015,98,94,5,2,0,


importing json

In [None]:
import json
import os

In [None]:
json_file = "../Project-2/cholera_data.json"

os.chdir("../Project-2/cholera_data.json")

# Reading the json as a dict
with open('cholera_data.json') as json_data:
    data = json.load(json_data)

In [None]:
df = pd.read_json("cholera_data.json")