In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import requests
import sqlite3

## Extracting the longitude Latitude table

In [None]:
# found a table containing longitude and latitudes of countries and 
# brought it into pandas

long_lat=pd.read_html("https://developers.google.com/public-data/docs/canonical/countries_csv",skiprows=1)[0]

# renaming columns
long_lat.columns = ['country_code', 'latitude','longitude','name']

# printing dataframe
long_lat.head()

In [None]:
#  country code 
converting_country_code=pd.read_html("https://www.worldatlas.com/aatlas/ctycodes.htm",skiprows=1)[0]
converting_country_code.columns= ['name','country_code_2','country_code_3','x','y']
converting_country_code=converting_country_code.drop(['x', 'y'], axis=1)
converting_country_code.head()


In [None]:
merged_col=pd.merge(converting_country_code, long_lat, left_on='country_code_2',right_on="country_code",how="inner")
merged_col.head()



In [None]:
# cleaning up the pandas dataframe
# dropping cols I dont need
merged_col=merged_col.drop(['name_y','country_code','country_code_2'], axis=1)
merged_col.head()


In [None]:
# renaming cols
merged_col.columns = ['country','country_code','latitude','longitude']

In [None]:
merged_col.head()


In [None]:
# Saved the merged cols into a csv
merged_col.to_csv("/Users/muhammadwaliji/Desktop/project_2/country_code.csv")


In [None]:
# Used the WHO API to get access to how they add the country code and will be 
# merging it with our own table to make sure the data we intend to parse in future from WHO alligns with our 
# merged_col
url = "http://apps.who.int/gho/athena/api/COUNTRY?format=json"
country_r = requests.get(url).json()
country_r


In [None]:
# checking to see what needs to be added into the country_r to output the country_code 

country_r['dimension'][0]['code'][0]['label']


In [None]:
# Creating a list of all the country_codes I can get from the WHO website

who_country_list=[]

country_code=country_r['dimension'][0]['code']

for country in country_code:
    who_country_list.append(country["label"])

In [None]:
# converting the list into a dataframe and renaming the col. I did this 
# so I can merge this dataframe with the one we made earlier to make sure all the who countries are in the mergel_col

who_df=pd.DataFrame(who_country_list)
who_df.columns=["who_country"]
who_df.head()


In [None]:
# merging the two data frames together on country code. this output 226 
# countries which is sufficent for our data set
who_and_others=pd.merge(merged_col, who_df, left_on='country_code',right_on="who_country",how="inner")
who_and_others.head()


In [None]:
who_and_others=who_and_others.drop(['who_country'], axis=1)


In [None]:
who_and_others.head(1)
# who_and_others.shape

In [None]:
who_and_others.to_csv("/Users/muhammadwaliji/Desktop/project_2/who_inc_country_code.csv")


## Extracting the Disease data using API

In [None]:
## diseases calls

url = "http://apps.who.int/gho/athena/data/GHO/"
url_after = "?format=html&filter=COUNTRY:*"
malaria_code = "WHS3_48"
yel_fev_code = "WHS3_50"
leprosy_code = "WHS3_45"

In [None]:
malaria_url = url + malaria_code + url_after
yel_fev_url = url + yel_fev_code + url_after
leprosy_url = url + leprosy_code + url_after

In [None]:
mal_raw = pd.read_html(malaria_url)[0]

In [None]:
yel_raw = pd.read_html(yel_fev_url)[0]

In [None]:
lep_raw = pd.read_html(leprosy_url)[0]

In [None]:
# display data to observe
display(mal_raw.head())
display(yel_raw.head())
display(lep_raw.head())

In [None]:
mal_cols = mal_raw.loc[:, ['YEAR', 'COUNTRY', 'NUMERIC VALUE']].copy('deep')
mal_cols['DISEASE'] = 'Malaria'

In [None]:
yel_cols = yel_raw.loc[:, ['YEAR', 'COUNTRY', 'NUMERIC VALUE']].copy('deep')
yel_cols['DISEASE'] = 'Yellow Fever'

In [None]:
lep_cols = lep_raw.loc[:, ['YEAR', 'COUNTRY', 'NUMERIC VALUE']].copy('deep')
lep_cols['DISEASE'] = 'Leprosy'

In [None]:
# observe changes to data
display(mal_cols.head())
display(yel_cols.head())
display(lep_cols.head())

In [None]:
dfs = [mal_cols, yel_cols, lep_cols]

In [None]:
disease_df = pd.concat(dfs)

In [None]:
disease_df = disease_df[['YEAR', 'COUNTRY', 'DISEASE', 'NUMERIC VALUE']]
disease_df.head()

In [None]:
disease_df.loc[
    (disease_df['COUNTRY'] == 'Congo') &
    (disease_df['DISEASE'] == 'Malaria')
].sort_values('YEAR')

## Option 1: Use pd.to_sql

In [None]:
# SQlite file name and location

# fix this when running on your own computer as this is the file name for Schehrbano's computer

sqlite_filename = '/Users/muhammadwaliji/Desktop/project_2/global_diseases.sqlite'


In [None]:
# connection to the sqlite file 
conn = sqlite3.connect(sqlite_filename)
cur = conn.cursor()

In [None]:
# Adding the country long lat to a sqlite file
who_and_others.to_sql("country_long_lat", conn, if_exists="replace")


In [None]:
# Adding the disease instances table to a sqlite file
disease_df.to_sql('instances_table', conn, if_exists='replace')

In [None]:
# At this point we are essentially done adding things to sqlite
#  if this is the end, run the following lines
# cur.close()
# conn.close()

# If you want to run sql queries now, do as follows 
# |
# |
# |
# V

In [None]:
# Easy way of reading the data back directly into pandas.

df_from_sqlite = pd.read_sql_query("""
SELECT * FROM country_long_lat
""", conn)

In [None]:
df_from_sqlite.head()

In [None]:
cur.close()
conn.close()