# Starter Notebook (to make the following pulls):
## WHO Mortality and Population data
## World Bank GDP data
## ICD-10 Codes

In [1]:
import pandas as pd

In [12]:
# WHO Mortality data
# Part 1 - https://www.who.int/healthinfo/statistics/Morticd10_part1.zip?ua=1
# Part 2 - https://www.who.int/healthinfo/statistics/Morticd10_part2.zip?ua=1

mortality_part1 = pd.read_csv('Morticd10_part1.csv', low_memory = False)
mortality_part2 = pd.read_csv('Morticd10_part2.csv', low_memory = False)
mortality = pd.concat([mortality_part1, mortality_part2])

# WHO Country Codes
# https://www.who.int/healthinfo/statistics/country_codes.zip?ua=1

country_codes = pd.read_csv('country_codes.csv', low_memory = False)

# WHO Population data
# https://www.who.int/healthinfo/Pop.zip?ua=1

population = pd.read_csv('pop.csv', low_memory = False)

In [13]:
# World Bank GDP data (per capita, by country)
# http://api.worldbank.org/v2/en/indicator/NY.GDP.PCAP.CD?downloadformat=csv

gdp = pd.read_csv('GDP.csv')

In [14]:
# ICD Codes for year 2011
# ftp://ftp.cdc.gov/pub/Health_Statistics/NCHS/Publications/ICD10CM/2011/

icd10_codes = pd.read_fwf('icd10_codes_2011.txt', header = None)
icd10_codes.columns = ["Code", "Description"]

In [17]:
# A peek at Mortality Data

mortality.head(1).T

Unnamed: 0,0
Country,1400.0
Admin1,
SubDiv,
Year,2001.0
List,101.0
Cause,1000.0
Sex,1.0
Frmat,7.0
IM_Frmat,8.0
Deaths1,332.0


In [18]:
# A peek at the WHO Country Codes

country_codes.head(1)

Unnamed: 0,country,name
0,1010,Algeria


In [19]:
# A peek at the Population dataset

population.head(1).T

Unnamed: 0,0
Country,1060.0
Admin1,
SubDiv,
Year,1980.0
Sex,1.0
Frmat,7.0
Pop1,137100.0
Pop2,3400.0
Pop3,15800.0
Pop4,


In [20]:
# A peek at the ICD-10 codes for 2011

icd10_codes.head(1)

Unnamed: 0,Code,Description
0,A000,"Cholera due to Vibrio cholerae 01, biovar chol..."


In [21]:
# A peek at the GDP dataset

gdp.head(1).T

Unnamed: 0,0
Country Name,Aruba
Country Code,ABW
Indicator Name,GDP per capita (current US$)
Indicator Code,NY.GDP.PCAP.CD
1960,
1961,
1962,
1963,
1964,
1965,


In [70]:
Deaths2_group = pd.DataFrame(mortality.groupby(['Country', 'Year', 'Cause'])['Deaths2'].sum())
Deaths2_non_null = Deaths2_group[Deaths2_group["Deaths2"]!=0]
Deaths2_group.sort_values(["Year", "Deaths2"], ascending=False,inplace=True)
Deaths2_group=Deaths2_group.reset_index()
Result = Deaths2_group.groupby(["Country"]).head(4)
Result.to_csv("Deaths2 Count by Cause and Year.csv",index=False)

In [71]:
Deaths2_by_cause_and_year = pd.read_csv('Deaths2 Count by Cause and Year.csv')

In [82]:
dropped_AAA_totals = Deaths2_by_cause_and_year[Deaths2_by_cause_and_year['Cause'] != 'AAA']

In [83]:
Deaths2_2016 = dropped_AAA_totals[dropped_AAA_totals.Year == 2016]

In [84]:
Deaths2_cause_totals_2016 = pd.DataFrame(Deaths2_2016.groupby(['Cause'])['Deaths2'].sum())

In [85]:
Deaths2_cause_totals_2016_sorted = Deaths2_cause_totals_2016.sort_values('Deaths2', ascending = False)

In [86]:
Deaths2_cause_totals_2016_sorted.head(6)

Unnamed: 0_level_0,Deaths2
Cause,Unnamed: 1_level_1
P220,9033.0
P369,7523.0
Q249,4976.0
P072,4117.0
R959,1673.0
R99,1378.0


In [87]:
Deaths2_cause_totals_2016_by_country = pd.DataFrame(Deaths2_2016.groupby(['Country', 'Cause'])['Deaths2'].sum())

In [88]:
Deaths2_cause_totals_2016_by_country.sort_values('Deaths2', ascending = False)

Unnamed: 0_level_0,Unnamed: 1_level_0,Deaths2
Country,Cause,Unnamed: 2_level_1
2450,P072,3044.0
2070,P369,2895.0
2310,P220,2685.0
2310,P369,2558.0
2070,P220,2046.0
2310,Q249,1990.0
3400,P220,1891.0
2450,R959,1500.0
2070,Q249,1452.0
2450,R99,1247.0


In [94]:
Deaths2_cause_totals_all_time = pd.DataFrame(dropped_AAA_totals.groupby(['Cause'])['Deaths2'].sum())

In [95]:
Deaths2_cause_totals_all_time_sorted = Deaths2_cause_totals_all_time.sort_values('Deaths2', ascending = False)

In [96]:
Deaths2_cause_totals_all_time_sorted

Unnamed: 0_level_0,Deaths2
Cause,Unnamed: 1_level_1
1000,22937.0
P220,15842.0
P369,15245.0
1092,11916.0
P07,7669.0
P22,6882.0
P072,6649.0
J18,6454.0
Q249,6237.0
R99,5576.0


In [98]:
mortality.Year.unique()

array([2001, 2002, 2003, 2004, 1996, 1997, 1998, 1999, 2000, 1995, 1994,
       1988, 1989, 1990, 1991, 1992, 1993, 2005, 2006, 2007, 2008, 2009,
       2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017], dtype=int64)

In [101]:
pd.read_csv('Mortality Cause Code Descriptions.csv')

Unnamed: 0,Cause,ICD-10 Code(s),Cause Description
0,1000,,All causes
1,1001,A00-B99,Certain infectious and parasitic diseases
2,1002,A00,Cholera
3,1003,A09,Diarrhoea and gastroenteritis of presumed infe...
4,1004,A01-A08,Other intestinal infectious diseases
5,1005,A15-A16,Respiratory tuberculosis
6,1006,A17-A19,Other tuberculosis
7,1007,A20,Plague
8,1008,A33-A35,Tetanus
9,1009,A36,Diphtheria
