In [217]:
from bokeh.io import output_notebook, show, push_notebook
from bokeh.plotting import figure
from bokeh.charts import Histogram, HeatMap
from bokeh.layouts import row, column, gridplot, layout, widgetbox
from bokeh.models import CustomJS, ColumnDataSource, Slider, HoverTool, LinearAxis
from bokeh.models.widgets import Select, Panel, Tabs, Div
import pandas as pd 
import math 
import numpy as np 
import csv
from collections import defaultdict
from ipywidgets import interact
from bokeh.core.properties import field

output_notebook() 

# create a dict: country -> region 
region_dict = defaultdict(str)
with open("gapminder_regions.csv",'rb') as regions:
    dr = csv.DictReader(regions)
    for line in dr:
        region_dict[line['Country']] = line['Group']
        

# preprocess data for line graph 
data = pd.read_csv("ppl-living-with-hiv-all-ages.csv")
line_years = data.columns.values[1:]

i_subSaharanAfrica = []
i_southAsia = []
i_middleEastNorthAfrica = []
i_america = []
i_europeCentralAsia = []
i_eastAsiaPacific = [] 

for i in range(len(data["People living with HIV"])): 
    if (region_dict[data["People living with HIV"].iloc[i]] == "Sub-Saharan Africa"): 
        i_subSaharanAfrica.append(i)
    elif (region_dict[data["People living with HIV"].iloc[i]] == "South Asia"): 
        i_southAsia.append(i)
    elif (region_dict[data["People living with HIV"].iloc[i]] == "Middle East & North Africa"):
        i_middleEastNorthAfrica.append(i)
    elif (region_dict[data["People living with HIV"].iloc[i]] == "America"):
        i_america.append(i)
    elif (region_dict[data["People living with HIV"].iloc[i]] == "Europe & Central Asia"):
        i_europeCentralAsia.append(i)
    elif (region_dict[data["People living with HIV"].iloc[i]] == "East Asia & Pacific"):
        i_eastAsiaPacific.append(i) 
        
sum_subSaharanAfrica = data.iloc[i_subSaharanAfrica].sum(axis=0).tolist()[1:]
sum_southAsia = data.iloc[i_southAsia].sum(axis=0).tolist()[1:]
sum_middleEastNorthAfrica = data.iloc[i_middleEastNorthAfrica].sum(axis=0).tolist()[1:]
sum_america = data.iloc[i_america].sum(axis=0).tolist()[1:]
sum_europeCentralAsia = data.iloc[i_europeCentralAsia].sum(axis=0).tolist()[1:]
sum_eastAsiaPacific = data.iloc[i_eastAsiaPacific].sum(axis=0).tolist()[1:]

#replace None with 0
for i in range(len(sum_eastAsiaPacific)): 
    if (sum_eastAsiaPacific[i] == None): 
        sum_eastAsiaPacific[i] = 0

    if (sum_subSaharanAfrica[i] == None): 
        sum_subSaharanAfrica[i] = 0

    if (sum_southAsia[i] == None): 
        sum_southAsia[i] = 0
    
    if (sum_middleEastNorthAfrica[i] == None): 
        sum_middleEastNorthAfrica[i] = 0
    
    if (sum_america[i] == None): 
        sum_america[i] = 0
    
    if (sum_europeCentralAsia[i] == None): 
        sum_europeCentralAsia[i] = 0

line_source = {"Sub-Saharan Africa": sum_subSaharanAfrica, "South Asia": sum_southAsia, 
               "Middle East & North Africa": sum_middleEastNorthAfrica, "America": sum_america, 
              "Europe & Central Asia": sum_europeCentralAsia, "East Asia & Pacific": sum_eastAsiaPacific} 


In [226]:
line_graph = figure(title="People living with HIV (all ages)", plot_height=400, plot_width=600)
line_r = line_graph.line(line_years, line_source["America"], color="#2222aa", line_width=3)
hover_line = HoverTool(
		tooltips=[
			("year", "$x"),
			("number", "$y"),
		])

line_graph.add_tools(hover_line)

def update_line(region):
    line_r.data_source.data['y'] = line_source[region]
    push_notebook()
    
show(line_graph, notebook_handle=True)

In [227]:
interact(update_line, region=["America", "Sub-Saharan Africa", "South Asia", "Middle East & North Africa", 
                              "Europe & Central Asia", "East Asia & Pacific"])

<function __main__.update_line>

In [220]:
num_ppl = pd.read_csv("ppl-living-with-hiv-all-ages.csv")
annual_death = pd.read_csv("annual-number-of-AIDS-deaths.csv")
#num_ppl.iloc[:, 12:] 

In [221]:
i_need = []
for i in range(len(annual_death["Annual number of AIDS deaths"])): 
    if (region_dict[annual_death["Annual number of AIDS deaths"].iloc[i]]): 
        i_need.append(i) 
        
annual_death = annual_death.iloc[i_need]

In [222]:
def get_data(year): 
    tmp_annual_death = annual_death[annual_death[year]<annual_death[year].median()*100] 
    y = annual_death[year][annual_death[year]<annual_death[year].median()*100].tolist()
    #print len(y)
    annual_death_ctys = tmp_annual_death["Annual number of AIDS deaths"].tolist() 
    x = num_ppl[num_ppl["People living with HIV"].isin(annual_death_ctys)][year]
    #print len(x) 
    
    colors = [] 
    scatter_legends = []
    for c in annual_death_ctys: 
        if (region_dict[c] == "Sub-Saharan Africa"): 
            colors.append("navy")
            scatter_legends.append("Sub-Saharan Africa")
        elif (region_dict[c] == "South Asia"): 
            colors.append("azur")
            scatter_legends.append("South Asia")
        elif (region_dict[c] == "Middle East & North Africa"):
            colors.append("green")
            scatter_legends.append("Middle East & North Africa")
        elif (region_dict[c] == "America"):
            colors.append("yellow")
            scatter_legends.append("America")
        elif (region_dict[c] == "Europe & Central Asia"):
            colors.append("orange")
            scatter_legends.append("Europe & Central Asia")
        elif (region_dict[c] == "East Asia & Pacific"):
            colors.append("red")
            scatter_legends.append("East Asia & Pacific")
    return [x, y, colors, scatter_legends]
 

In [223]:
scatter_years = annual_death.columns.values[1:]
print scatter_years 

['1990' '1991' '1992' '1993' '1994' '1995' '1996' '1997' '1998' '1999'
 '2000' '2001' '2002' '2003' '2004' '2005' '2006' '2007' '2008' '2009'
 '2010' '2011']


In [224]:
init_data = get_data('1990')
print len(init_data[0])
print len(init_data[1])
print len(init_data[2])
print len(init_data[3])
scatter_source = {'x': init_data[0], 'y': init_data[1], }
scatter_graph = figure(title="People living with HIV (all ages) vs. Annual death of AIDS", 
                       plot_height=500, plot_width=700, x_axis_label="People living with HIV (all ages)",
                      y_axis_label="Annual death of AIDS")
scatter_r = scatter_graph.circle(init_data[0], init_data[1], size=10, fill_color=init_data[2], 
                                 fill_alpha=0.6, line_color=None) #, legend=field(init_data[3]

hover_scatter = HoverTool(
		tooltips=[
			("People living with HIV", "$x"),
			("Annual death of AIDS", "$y"),
		])


scatter_graph.add_tools(hover_scatter)

def update_scatter(year=1990):
    result = get_data(str(year)) 
    scatter_r.data_source.data['x'] = result[0]
    scatter_r.data_source.data['y'] = result[1] 
    scatter_r.data_source.data['fill_color'] = result[2] 
    scatter_r.data_source.data['legend'] = result[3]
    push_notebook()

show(scatter_graph, notebook_handle=True)


133
133
133
133


In [225]:
interact(update_scatter, year=(1990, 2011))

<function __main__.update_scatter>