# "Malaria Data Visualizations"
> "Three plots summarizing information on incidence of malaria, mortality, and bednet usage."

- toc: false
- branch: master
- badges: true
- comments: true
- categories: [fastpages, jupyter]
- image: images/some_folder/your_image.png
- hide: false
- search_exclude: true
- metadata_key1: metadata_value1
- metadata_key2: metadata_value2
- use_plotly: true

Create 3 informative visualizations about malaria using Python in a Jupyter notebook, starting with the data sets at https://github.com/rfordatascience/tidytuesday/tree/master/data/2018/2018-11-13. Where appropriate, make the visualizations interactive.

Note There are many libraries you can use for each task. Choose one library and explain why you chose it in your blog.

incidence-of-malaria.csv

children-sleeping-under-treated-bednet.csv

In [393]:
import plotly.express as px


fig = px.violin(
    top6,
    x="Entity",
    y="Inc",
    color="Entity",
)
fig.update_layout(showlegend=False)

In [398]:
px.scatter(
    merged,
    x="Inc",
    y="Nets",
    animation_frame="Year",
    animation_group="Entity",
    size="Inc",
    color="Entity",
    hover_name="Entity",
)


In [305]:
import pandas as pd
import seaborn as sns

In [306]:
incidence = pd.read_csv('incidence-of-malaria.csv')
bednet = pd.read_csv('children-sleeping-under-treated-bednet.csv')

In [307]:
incidence = incidence.rename(columns = {"Incidence of malaria (per 1,000 population at risk)":"Inc"})
incidence = incidence[['Entity', 'Year', 'Inc']]
print(incidence.head(10))


        Entity  Year         Inc
0  Afghanistan  2000   95.542797
1  Afghanistan  2001   91.885775
2  Afghanistan  2002  104.012541
3  Afghanistan  2003   68.409123
4  Afghanistan  2004   37.637712
5  Afghanistan  2005   27.079106
6  Afghanistan  2006   20.526101
7  Afghanistan  2007   21.548933
8  Afghanistan  2008   17.836232
9  Afghanistan  2009   14.771710


"Six countries accounted for more than half of all malaria cases worldwide: Nigeria (25%), the Democratic Republic of the Congo (12%), Uganda (5%), and Côte d’Ivoire, Mozambique and Niger (4% each)" 
https://www.who.int/news-room/feature-stories/detail/world-malaria-report-2019


In [308]:
top6 = (
    pd.concat(
        [incidence[incidence.Entity == "Nigeria"], 
        incidence[incidence.Entity == "Democratic Republic of Congo"],
        incidence[incidence.Entity == "Uganda"],
        incidence[incidence.Entity == "Cote d'Ivoire"],
        incidence[incidence.Entity == "Mozambique"],
        incidence[incidence.Entity == "Niger"]]
    )
)

In [309]:
import numpy as np
from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import HoverTool

In [310]:
output_notebook()

In [311]:
hover = HoverTool(tooltips = [("Country", "@Entity"), ("Year","@Year"), ("Incidence", "@Inc")])


fig1 = figure(title = "Malaria Incidence", x_axis_label = 'Year', y_axis_label = 'Incidence per 1,000 at risk', tools = [hover])
fig1.line(source = top6[top6.Entity == "Nigeria"], x = 'Year', y = 'Inc' , color = 'red', legend_label = 'Nigeria')
fig1.scatter(source = top6[top6.Entity == "Nigeria"], x = 'Year', y = 'Inc' , color = 'red', legend_label = 'Nigeria')

fig1.line(source = top6[top6.Entity == "Democratic Republic of Congo"], x = 'Year', y = 'Inc' , color = 'black', legend_label = 'Democratic Republic of Congo')
fig1.scatter(source = top6[top6.Entity == "Democratic Republic of Congo"], x = 'Year', y = 'Inc' , color = 'black', legend_label = 'Democratic Republic of Congo')

fig1.line(source = top6[top6.Entity == "Uganda"], x = 'Year', y = 'Inc' , color = 'blue', legend_label = 'Uganda')
fig1.scatter(source = top6[top6.Entity == "Uganda"], x = 'Year', y = 'Inc' , color = 'blue', legend_label = 'Uganda')

fig1.line(source = top6[top6.Entity == "Cote d'Ivoire"], x = 'Year', y = 'Inc' , color = 'green', legend_label = "Cote d'Ivoire")
fig1.scatter(source = top6[top6.Entity == "Cote d'Ivoire"], x = 'Year', y = 'Inc' , color = 'green', legend_label = "Cote d'Ivoire")

fig1.line(source = top6[top6.Entity == "Mozambique"], x = 'Year', y = 'Inc' , color = 'orange', legend_label = 'Mozambique')
fig1.scatter(source = top6[top6.Entity == "Mozambique"], x = 'Year', y = 'Inc' , color = 'orange', legend_label = 'Mozambique')

fig1.line(source = top6[top6.Entity == "Niger"], x = 'Year', y = 'Inc' , color = 'darkgrey', legend_label = 'Niger')
fig1.scatter(source = top6[top6.Entity == "Niger"], x = 'Year', y = 'Inc' , color = 'darkgrey', legend_label = 'Niger')

fig1.legend.location = "bottom_left"

In [312]:
show(fig1)

In [313]:
from bokeh.models import CustomJS, Dropdown
entities = list(incidence["Entity"].unique())

drop = Dropdown(label = "Geographic Location", menu = entities)
show(drop)

In [314]:
from bokeh.models.widgets import CheckboxGroup

boxes = CheckboxGroup(labels = entities, active = [0,1])
#show(boxes)

In [315]:
bednet = bednet.rename(columns = {"Use of insecticide-treated bed nets (% of under-5 population)":"Nets"})


In [316]:
top6_nets = (
    pd.concat(
        [bednet[bednet.Entity == "Nigeria"], 
        bednet[bednet.Entity == "Democratic Republic of Congo"],
        bednet[bednet.Entity == "Uganda"],
        bednet[bednet.Entity == "Cote d'Ivoire"],
        bednet[bednet.Entity == "Mozambique"],
        bednet[bednet.Entity == "Niger"]]
    )
)

top6_nets = top6_nets[top6_nets.Year < 2019]

In [317]:
hover_net = HoverTool(tooltips = [("Country", "@Entity"), ("Year","@Year"), ("Net Percent", "@Nets")])


fig2 = figure(title = "Use of Insecticide-Treated Bed Nets", 
              x_axis_label = 'Year', 
              y_axis_label = 'Percent of Under-5 Population', 
              tools = [hover_net])
fig2.line(source = top6_nets[top6_nets.Entity == "Nigeria"], x = 'Year', y = 'Nets' , color = 'red', legend_label = 'Nigeria')
fig2.scatter(source = top6_nets[top6_nets.Entity == "Nigeria"], x = 'Year', y = 'Nets' , color = 'red', legend_label = 'Nigeria')

fig2.line(source = top6_nets[top6_nets.Entity == "Democratic Republic of Congo"], x = 'Year', y = 'Nets' , color = 'black', legend_label = 'Democratic Republic of Congo')
fig2.scatter(source = top6_nets[top6_nets.Entity == "Democratic Republic of Congo"], x = 'Year', y = 'Nets' , color = 'black', legend_label = 'Democratic Republic of Congo')

fig2.line(source = top6_nets[top6_nets.Entity == "Uganda"], x = 'Year', y = 'Nets' , color = 'blue', legend_label = 'Uganda')
fig2.scatter(source = top6_nets[top6_nets.Entity == "Uganda"], x = 'Year', y = 'Nets' , color = 'blue', legend_label = 'Uganda')

fig2.line(source = top6_nets[top6_nets.Entity == "Cote d'Ivoire"], x = 'Year', y = 'Nets' , color = 'green', legend_label = "Cote d'Ivoire")
fig2.scatter(source = top6_nets[top6_nets.Entity == "Cote d'Ivoire"], x = 'Year', y = 'Nets' , color = 'green', legend_label = "Cote d'Ivoire")

fig2.line(source = top6_nets[top6_nets.Entity == "Mozambique"], x = 'Year', y = 'Nets' , color = 'orange', legend_label = 'Mozambique')
fig2.scatter(source = top6_nets[top6_nets.Entity == "Mozambique"], x = 'Year', y = 'Nets' , color = 'orange', legend_label = 'Mozambique')

fig2.line(source = top6_nets[top6_nets.Entity == "Niger"], x = 'Year', y = 'Nets' , color = 'darkgrey', legend_label = 'Niger')
fig2.scatter(source = top6_nets[top6_nets.Entity == "Niger"], x = 'Year', y = 'Nets' , color = 'darkgrey', legend_label = 'Niger')

fig2.legend.location = "top_left"

In [318]:
show(fig2)

In [319]:
merged = pd.merge(top6, top6_nets, how = 'inner', on = ['Entity', 'Year'])
merged

Unnamed: 0,Entity,Year,Inc,Code,Nets
0,Nigeria,2003,409.157078,NGA,1.2
1,Nigeria,2008,424.655344,NGA,5.5
2,Nigeria,2010,398.90262,NGA,28.9
3,Nigeria,2011,372.557183,NGA,16.4
4,Nigeria,2013,328.654579,NGA,16.6
5,Nigeria,2014,314.404862,NGA,25.4
6,Nigeria,2015,296.0814,NGA,43.6
7,Nigeria,2017,283.064074,NGA,49.1
8,Nigeria,2018,291.942514,NGA,52.2
9,Democratic Republic of Congo,2001,473.607811,COD,1.0


In [320]:
hover3 = HoverTool(tooltips = [("Country", "@Entity"), ("Year","@Year"), ("Net Percent", "@Nets"), ("Incidence", "@Inc")])


fig3 = figure(title = "Ratio", 
              x_axis_label = 'Percent of Under-5 Population Using Insecticide-Treated Bed Nets', 
              y_axis_label = 'Malaria Incidence per 1,000 at risk', 
              tools = [hover3])
fig3.scatter(source = merged[merged.Entity == "Nigeria"], x = 'Nets', y = 'Inc' , color = 'red', legend_label = 'Nigeria', size = 15)
fig3.scatter(source = merged[merged.Entity == "Democratic Republic of Congo"], x = 'Nets', y = 'Inc' , color = 'black', legend_label = 'Democratic Republic of Congo', size = 15)
fig3.scatter(source = merged[merged.Entity == "Uganda"], x = 'Nets', y = 'Inc' , color = 'blue', legend_label = 'Uganda', size = 15)
fig3.scatter(source = merged[merged.Entity == "Cote d'Ivoire"], x = 'Nets', y = 'Inc' , color = 'green', legend_label = "Cote d'Ivoire", size = 15)
fig3.scatter(source = merged[merged.Entity == "Mozambique"], x = 'Nets', y = 'Inc' , color = 'orange', legend_label = 'Mozambique', size = 15)
fig3.scatter(source = merged[merged.Entity == "Niger"], x = 'Nets', y = 'Inc' , color = 'darkgrey', legend_label = 'Niger', size = 15)


import pylab
import numpy as np

Poly = np.polynomial.Polynomial
#nmin, nmax = min(merged['Nets']), max(merged['Nets'])
nigeria = merged[merged.Entity == "Nigeria"]
fit, info = Poly.fit(nigeria['Nets'], nigeria['Inc'], 1)

fig3.line(nigeria['Nets'], fit)

In [321]:
show(fig3)

In [322]:
deaths = pd.read_csv('malaria-deaths-by-region.csv')
deaths = deaths.rename(columns = {"Deaths - Malaria - Sex: Both - Age: All Ages (Number)": "Deaths"})
deaths.sample(5)

Unnamed: 0,Entity,Code,Year,Deaths
5111,Slovenia,SVN,2005,0.0
3001,Kenya,KEN,1995,15386.951014
3221,Lesotho,LSO,1991,0.0
5238,South Korea,KOR,1992,11.153381
2640,Hungary,HUN,1998,0.0


In [323]:
from geopy.geocoders import Nominatim
import time
from pprint import pprint

app = Nominatim(user_agent="tutorial")
# get location raw data
location = app.geocode("Afghanistan").raw
# print raw data
pprint(location)

{'boundingbox': ['29.3772', '38.4910682', '60.5176034', '74.889862'],
 'class': 'boundary',
 'display_name': 'افغانستان',
 'icon': 'https://nominatim.openstreetmap.org/ui/mapicons//poi_boundary_administrative.p.20.png',
 'importance': 0.7470274828373141,
 'lat': '33.7680065',
 'licence': 'Data © OpenStreetMap contributors, ODbL 1.0. '
            'https://osm.org/copyright',
 'lon': '66.2385139',
 'osm_id': 303427,
 'osm_type': 'relation',
 'place_id': 258408076,
 'type': 'administrative'}


In [324]:
location['lat']

'33.7680065'

In [326]:
deaths.sort_values("Year", ascending = False)
deaths17 = deaths[deaths.Year == 2017]
deaths17.iloc[i]['Entity']

deaths17.sample(2)

Unnamed: 0,Entity,Code,Year,Deaths
4955,Senegal,SEN,2017,2145.934721
587,Belize,BLZ,2017,0.231767


In [327]:
coor = pd.DataFrame()

for i in range(len(deaths17)):
    
    if type(deaths17.iloc[i]['Code']) == str:
        location = app.geocode(deaths17.iloc[i]['Entity']).raw
        coor = pd.concat(
            [coor, 
             pd.DataFrame(
                 {"latitude" :location['lat'],
                  "longitude" :location['lon'],
                  "Entity" : deaths17.iloc[i]['Entity']
                 },
                 index = [i]
             )
         
            ]
        )

In [328]:
type(deaths17.iloc[3]['Code'])


str

In [329]:
coor.head(5)

Unnamed: 0,latitude,longitude,Entity
0,33.7680065,66.2385139,Afghanistan
1,41.000028,19.9999619,Albania
2,28.0000272,2.9999825,Algeria
3,-14.289304,-170.692511,American Samoa
5,42.5407167,1.5732033,Andorra


In [331]:
deaths17 = pd.merge(deaths17, coor, how = 'inner', on = "Entity")

In [370]:
data=dict(
    x=list(deaths17['longitude']),
    y=list(deaths17['latitude']),
    name=list(deaths17['Entity']),
    rate=list(deaths17['Deaths']),
)


In [371]:
from bokeh.models import LinearColorMapper
from bokeh.palettes import YlOrRd5 


In [387]:
from bokeh.plotting import figure, output_file, show
from bokeh.tile_providers import CARTODBPOSITRON, get_provider

output_file("tile.html")

tile_provider = get_provider(CARTODBPOSITRON)
mapper = LinearColorMapper(palette=list(reversed(YlOrRd5)), low=0, high=620000)


p = figure(
    title="Malaria Mortality in 2017",
    x_range=(-1, 10), y_range=(5, 100),
    x_axis_type="mercator", y_axis_type="mercator")
p.add_tile(tile_provider)
p.circle(x = 'latitude',y = 'longitude', source = deaths17, 
          fill_color = {'field' : 'Deaths', 'transform' : mapper})


show(p)

In [362]:
min(deaths17["Deaths"])

0.0

In [363]:
max(deaths17["Deaths"])

619826.6346603156

In [379]:
from bokeh.io import show
from bokeh.models import LogColorMapper
from bokeh.palettes import Viridis6 as palette
from bokeh.plotting import figure
from bokeh.sampledata.unemployment import data as unemployment
from bokeh.sampledata.us_counties import data as counties

palette = tuple(reversed(palette))

counties = {
    code: county for code, county in counties.items() if county["state"] == "tx"
}

county_xs = [county["lons"] for county in counties.values()]
county_ys = [county["lats"] for county in counties.values()]

county_names = [county['name'] for county in counties.values()]
county_rates = [unemployment[county_id] for county_id in counties]
color_mapper = LogColorMapper(palette=palette)

data=dict(
    x=county_xs,
    y=county_ys,
    name=county_names,
    rate=county_rates,
)

TOOLS = "pan,wheel_zoom,reset,hover,save"

p = figure(
    title="Texas Unemployment, 2009", tools=TOOLS,
    x_axis_location=None, y_axis_location=None,
    tooltips=[
        ("Name", "@name"), ("Unemployment rate", "@rate%"), ("(Long, Lat)", "($x, $y)")
    ])
p.grid.grid_line_color = None
p.hover.point_policy = "follow_mouse"

p.patches('x', 'y', source=data,
          fill_color={'field': 'rate', 'transform': color_mapper},
          fill_alpha=0.7, line_color="white", line_width=0.5)

show(p)

RuntimeError: bokeh sample data directory does not exist, please execute bokeh.sampledata.download()

In [381]:
min(deaths17['latitude'])

'-0.7264327'

In [382]:
min(deaths17['longitude'])

'-1.0800271'

In [383]:
max(deaths17['latitude'])

'9.6000359'

In [384]:
max(deaths17['longitude'])

'97.7453061'

In [391]:
from bokeh.io import output_file, show
from bokeh.models import ColumnDataSource, GMapOptions
from bokeh.plotting import gmap

output_file("gmap.html")

map_options = GMapOptions(lat=30.2861, lng=-97.7394, map_type="roadmap", zoom=11)

# For GMaps to function, Google requires you obtain and enable an API key:
#
#     https://developers.google.com/maps/documentation/javascript/get-api-key
#
# Replace the value below with your personal API key:
p = gmap("AIzaSyChgxQ_lN1PjzbVZ64l94Q3uy99jYuNOfk", map_options, title="Austin")

source = ColumnDataSource(
    data=dict(lat=[ 30.29,  30.20,  30.29],
              lon=[-97.70, -97.74, -97.78])
)

p.circle(x="lon", y="lat", size=15, fill_color="blue", fill_alpha=0.8, source=source)

show(p)