In [None]:
# libraries
import pandas as pd
import numpy as np
from IPython.display import HTML
import base64, io, IPython
from IPython.display import Image
from IPython import display
from IPython.display import HTML
import altair as alt

In [None]:
# load tidied data and print rows
amazon_tidy = pd.read_csv('data/amazon_tidy.csv').iloc[:, 1:]

amazon_tidy

In [None]:
# let's keep only the first 30 columns from the dataset

amazon_tidy1 = amazon_tidy.iloc[:,0:28]
amazon_tidy1.head()

In [None]:
# now lets find the sum of missing values for each variable

amazon_tidy1.isna().sum()

In [None]:
# correlation matrix
corr_mx = amazon_tidy1.corr()
corr_mx

In [None]:
# melt corr_mx
corr_mx_long = corr_mx.reset_index().rename(
    columns = {'index': 'row'}
).melt(
    id_vars = 'row',
    var_name = 'col',
    value_name = 'Correlation'
)

# construct heat map
alt.Chart(corr_mx_long).mark_rect().encode(
    x = alt.X('col', title = '', sort = {'field': 'Correlation', 'order': 'ascending'}),
    y = alt.Y('row', title = '', sort = {'field': 'Correlation', 'order': 'ascending'}),
    color = alt.Color('Correlation',
                      scale = alt.Scale(scheme = 'blueorange',
                                        domain = (-1, 1), # ensure white = 0
                                        type = 'sqrt'),
                      legend = alt.Legend(tickCount = 5)
                     )
).properties(width = 300, height = 300)

In [None]:
# pivot to disease
amazon_disease = amazon_tidy1.melt(
    id_vars = amazon_tidy1.drop(['Chikungunya',
       'Cutaneous.Leishmaniasis', 'Dengue', 'Malaria', 'Mucosal.Leishmaniasis',
       'Visceral.Leishmaniasis', 'Yellow.Fever', 'Zika'], axis=1).columns,
    value_vars = ['Chikungunya',
       'Cutaneous.Leishmaniasis', 'Dengue', 'Malaria', 'Mucosal.Leishmaniasis',
       'Visceral.Leishmaniasis', 'Yellow.Fever', 'Zika'],
    var_name = 'Disease',
    value_name = 'Rate'
)
amazon_disease.head()

amazon_disease1 = amazon_disease[(amazon_disease.Rate.notna())]
amazon_disease1

In [None]:
select_year = alt.selection_single(
    name='select', fields=['Year'], init={'Year': 2010},
    bind=alt.binding_range(min=2010, max=2019, step=1)
)

alt.data_transformers.enable('default', max_rows=None)

# base plot

base_colombia = alt.Chart(amazon_disease1[amazon_disease1.Country == 'Colombia'])

# .transform_filter(
#     alt.FieldOneOfPredicate(field = 'Year',
#                             oneOf = [2010, 2015]
#                            )
# )

# kernel density estimate 
precip_density_colombia = base_colombia.transform_density(
    density = 'Rate',
    groupby = ['Year', 'Disease'],
    as_ = ['Rate', 'Estimated density'],
    bandwidth = 100,
    steps = 1000
).mark_line().encode(
    x = 'Rate:Q',
    y = 'Estimated density:Q',
    color = 'Disease'
).add_selection(select_year).transform_filter(select_year)

precip_density_colombia

# precip_density.save('precip_density.html')
# precip_density.save('precip_density.json')

# x = rate, color = disease

In [None]:
base_peru = alt.Chart(amazon_disease1[amazon_disease1.Country == 'Peru'])

# .transform_filter(
#     alt.FieldOneOfPredicate(field = 'Year',
#                             oneOf = [2010, 2015]
#                            )
# )

# kernel density estimate 
precip_density_peru= base_peru.transform_density(
    density = 'Rate',
    groupby = ['Year', 'Disease'],
    as_ = ['Rate', 'Estimated density'],
    bandwidth = 100,
    steps = 1000
).mark_line().encode(
    x = 'Rate:Q',
    y = 'Estimated density:Q',
    color = 'Disease'
).add_selection(select_year).transform_filter(select_year)

precip_density_peru

In [None]:
# # Peru

# base_peru = alt.Chart(amazon_disease[amazon_disease.Country == 'Peru']).transform_filter(
#     alt.FieldOneOfPredicate(field = 'Year',
#                             oneOf = [2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019]
#                            )
# )

# precip_density_peru = base_peru.transform_density(
#     density = 'Precip',
#     groupby = ['Year'],
#     as_ = ['Precipitation', 'Estimated density'],
#     bandwidth = 100,
#     steps = 1000
# ).mark_line().encode(
#     x = 'Precipitation:Q',
#     y = 'Estimated density:Q'
# ).add_selection(select_year).transform_filter(select_year)

# precip_density_peru

In [None]:
base_brazil = alt.Chart(amazon_disease1[(amazon_disease1.Country == 'Brazil') & (amazon_disease1.Rate != 0)])

# .transform_filter(
#     alt.FieldOneOfPredicate(field = 'Year',
#                             oneOf = [2010, 2015]
#                            )
# )

# kernel density estimate 
precip_density_brazil = base_brazil.transform_density(
    density = 'Rate',
    groupby = ['Year', 'Disease'],
    as_ = ['Rate', 'Estimated density'],
    bandwidth = 100,
    steps = 1000
).mark_line().encode(
    x = 'Rate:Q',
    y = 'Estimated density:Q',
    color = 'Disease'
).add_selection(select_year).transform_filter(select_year)

precip_density_brazil

In [None]:
# faceted plot of x: precipitation, y: rates, color: disease, years nondependent TO FIX


# make univariate grids
precip_grid = np.linspace(amazon_disease.Precip.min(), amazon_disease.Precip.max(), 100)
rate_grid = amazon_disease.Rate.quantile([0.1, 0.5, 0.9]).values

# make mesh grid -- all combinations of univariate grid values
tx, ix = np.meshgrid(precip_grid, rate_grid)
x_grid_mx = np.vstack([np.repeat(1, 1200), tx.reshape(1200), ix.reshape(1200)]).transpose()
grid_df = pd.DataFrame(x_grid_mx, columns = ['intercept', 'precip', 'rate'])
grid_mx = pd.concat(
    [grid_df],
    axis = 1
).astype('float64').values

# add predictions and standard errors
grid_df['mean_summer_temp'] = mlr_full.predict(grid_mx)
grid_df['fit_se'] = np.sqrt(grid_mx.dot(vhat_full).dot(grid_mx.transpose()).diagonal())
grid_df['density_order'] = grid_df.pop_density.replace({'very low': 1, 'low': 2, 'medium': 3, 'high': 4})

# base layer
base = alt.Chart(grid_df).encode(
    x = alt.X('tree_cover',  scale = alt.Scale(type = 'pow', exponent = 1/2, zero = False)),
    color = alt.Color('mean_income', scale = alt.Scale(scheme = 'orangered', reverse = True, type = 'log'))
)

# regression lines
lines = base.mark_line().encode(
    y = alt.Y('mean_summer_temp', scale = alt.Scale(zero = False)),
    strokeDash = 'pop_density'
)

# uncertainty bands
bands = base.transform_calculate(
    upr = 'datum.mean_summer_temp + 2*datum.fit_se',
    lwr = 'datum.mean_summer_temp - 2*datum.fit_se'
).mark_errorband(opacity = 0.3).encode(
    y = alt.Y('lwr:Q', title = 'mean_summer_temp'),
    y2 = 'upr:Q'
)

# model visualization
fig3 = bands + lines


# fig3.properties(
#     width = 125,
#     height = 200
# ).facet(
#     column = alt.Column('pop_density', 
#                         sort = {'field': 'density_order',
#                                 'order': 'descending'})
# )