In [1]:
import pandas as pd
import requests
import numpy as np
from sqlalchemy import create_engine
import sqlite3


In [2]:
## diseases calls

url = "http://apps.who.int/gho/athena/data/GHO/"
url_after = "?format=html&filter=COUNTRY:*"
malaria_code = "WHS3_48"
yel_fev_code = "WHS3_50"
leprosy_code = "WHS3_45"

In [3]:
malaria_url = url + malaria_code + url_after
yel_fev_url = url + yel_fev_code + url_after
leprosy_url = url + leprosy_code + url_after

In [4]:
mal_raw = pd.read_html(malaria_url)[0]

In [5]:
yel_raw = pd.read_html(yel_fev_url)[0]

In [6]:
lep_raw = pd.read_html(leprosy_url)[0]

In [7]:
# display data to observe
display(mal_raw.head())
display(yel_raw.head())
display(lep_raw.head())

Unnamed: 0,GHO,PUBLISHSTATE,YEAR,REGION,COUNTRY,DISPLAY VALUE,NUMERIC VALUE,LOW RANGE,HIGH RANGE,Comment
0,Malaria - number of reported confirmed cases,Published,2013,Eastern Mediterranean,Afghanistan,39263,39263.0,,,
1,Malaria - number of reported confirmed cases,Published,2015,Eastern Mediterranean,Afghanistan,86895,86895.0,,,
2,Malaria - number of reported confirmed cases,Published,2012,Africa,Algeria,55,55.0,,,
3,Malaria - number of reported confirmed cases,Published,2014,Africa,Algeria,0,0.0,,,
4,Malaria - number of reported confirmed cases,Published,2016,Africa,Angola,3794253,3794253.0,,,


Unnamed: 0,GHO,PUBLISHSTATE,YEAR,REGION,WORLDBANKINCOMEGROUP,COUNTRY,DISPLAY VALUE,NUMERIC VALUE,LOW RANGE,HIGH RANGE,Comment
0,Yellow fever - number of reported cases,Published,1997,Africa,Low-income,Chad,0,0.0,,,
1,Yellow fever - number of reported cases,Published,2009,Americas,Upper-middle-income,Suriname,0,0.0,,,
2,Yellow fever - number of reported cases,Published,2012,Africa,High-income,Seychelles,0,0.0,,,
3,Yellow fever - number of reported cases,Published,2009,Eastern Mediterranean,High-income,Bahrain,0,0.0,,,
4,Yellow fever - number of reported cases,Published,1993,Africa,Low-income,Senegal,0,0.0,,,


Unnamed: 0,GHO,PUBLISHSTATE,YEAR,REGION,COUNTRY,DISPLAY VALUE,NUMERIC VALUE,LOW RANGE,HIGH RANGE,Comment
0,Number of new leprosy cases,Published,2005,,Algeria,0,0.0,,,
1,Number of new leprosy cases,Published,2005,,Bahrain,0,0.0,,,
2,Number of new leprosy cases,Published,2005,,Cook Islands,0,0.0,,,
3,Number of new leprosy cases,Published,2005,,Democratic People's Republic of Korea,0,0.0,,,
4,Number of new leprosy cases,Published,2005,,Djibouti,0,0.0,,,


In [8]:
mal_cols = mal_raw.loc[:, ['YEAR', 'COUNTRY', 'NUMERIC VALUE']].copy('deep')
mal_cols['DISEASE'] = 'Malaria'

In [9]:
yel_cols = yel_raw.loc[:, ['YEAR', 'COUNTRY', 'NUMERIC VALUE']].copy('deep')
yel_cols['DISEASE'] = 'Yellow Fever'

In [10]:
lep_cols = lep_raw.loc[:, ['YEAR', 'COUNTRY', 'NUMERIC VALUE']].copy('deep')
lep_cols['DISEASE'] = 'Leprosy'

In [11]:
# observe changes to data
display(mal_cols.head())
display(yel_cols.head())
display(lep_cols.head())

Unnamed: 0,YEAR,COUNTRY,NUMERIC VALUE,DISEASE
0,2013,Afghanistan,39263.0,Malaria
1,2015,Afghanistan,86895.0,Malaria
2,2012,Algeria,55.0,Malaria
3,2014,Algeria,0.0,Malaria
4,2016,Angola,3794253.0,Malaria


Unnamed: 0,YEAR,COUNTRY,NUMERIC VALUE,DISEASE
0,1997,Chad,0.0,Yellow Fever
1,2009,Suriname,0.0,Yellow Fever
2,2012,Seychelles,0.0,Yellow Fever
3,2009,Bahrain,0.0,Yellow Fever
4,1993,Senegal,0.0,Yellow Fever


Unnamed: 0,YEAR,COUNTRY,NUMERIC VALUE,DISEASE
0,2005,Algeria,0.0,Leprosy
1,2005,Bahrain,0.0,Leprosy
2,2005,Cook Islands,0.0,Leprosy
3,2005,Democratic People's Republic of Korea,0.0,Leprosy
4,2005,Djibouti,0.0,Leprosy


In [12]:
dfs = [mal_cols, yel_cols, lep_cols]

In [13]:
disease_df = pd.concat(dfs)

In [14]:
disease_df = disease_df[['YEAR', 'COUNTRY', 'DISEASE', 'NUMERIC VALUE']]
disease_df.head()

Unnamed: 0,YEAR,COUNTRY,DISEASE,NUMERIC VALUE
0,2013,Afghanistan,Malaria,39263.0
1,2015,Afghanistan,Malaria,86895.0
2,2012,Algeria,Malaria,55.0
3,2014,Algeria,Malaria,0.0
4,2016,Angola,Malaria,3794253.0


In [15]:
disease_df.loc[
    (disease_df['COUNTRY'] == 'Congo') &
    (disease_df['DISEASE'] == 'Malaria')
].sort_values('YEAR')

Unnamed: 0,YEAR,COUNTRY,DISEASE,NUMERIC VALUE
259,2000,Congo,Malaria,15751.0
260,2001,Congo,Malaria,11981.0
1113,2002,Congo,Malaria,7677.0
1114,2003,Congo,Malaria,1633.0
687,2004,Congo,Malaria,293.0
1328,2005,Congo,Malaria,67.0
474,2007,Congo,Malaria,103213.0
1329,2008,Congo,Malaria,117291.0
688,2009,Congo,Malaria,92855.0
47,2011,Congo,Malaria,37744.0


In [16]:
sqlite_file = '/Users/Russ/Data Bootcamp/Global_diseases/global_diseases.sqlite'

In [17]:
conn = sqlite3.connect(sqlite_file)

In [18]:
disease_df.to_sql('instances_table', conn, if_exists='replace')

  dtype=dtype, method=method)


In [19]:
cur = conn.close()