# Introduction to Altair For Interactive Visualiation

This jupyter notebook steps through some examples of interactive plots that can be made using Altair, a python package that is derived from Vega and Vega-Lite, which allows high level visualization using a JSON syntax. Execute the cells below to view some interactive Lake Problem visualizations. Code is derived from: 

https://github.com/julianneq/Lake_Problem_DPS

Altair can be installed using: 

*pip install altair vega_datasets*

OR

*conda install -c conda-forge altair vega_datasets*

*import libraries*

In [2]:
import altair as alt
import pandas as pd
import numpy as np

DPS formulation of the Lake Problem

In [3]:
def DPSpolicy(lake_state, vars):
    # Determine centers, radii and weights of RBFs
    C = vars[0::3]
    B = vars[1::3]
    W = vars[2::3]
    newW = np.zeros(len(W))
    
    # Normalize weights to sum to 1
    total = sum(W)
    if total != 0.0:
        for i in range(len(W)):
            newW[i] = W[i]/total
    else:
        for i in range(len(W)):
            newW[i] = 1/n
    
    # Determine pollution emission decision, Y
    Y = 0
    for i in range(len(C)):
        if B[i] != 0:
            Y = Y + W[i]*((np.absolute(lake_state-C[i])/B[i])**3)
            
    Y = min(0.1,max(Y,0.01))
    
    return Y

Selection Detail Example: This type of figure allows you to link data across two figures, so when you click a point, the corresponding line is shown in the second figure. I used this figure to highlight the discharge policy that is associated with each point in the DPS Pareto front.

In [5]:
DPS_resultfile = np.loadtxt('DPS.resultfile')
DPS_reference = np.loadtxt('DPS.reference')

lake_state = np.arange(0,2.5,0.2)    

x=DPS_reference[:,1]
y=-1*DPS_reference[:,0]

n_objects = len(x)
n_times = len(lake_state)



Y_new=np.zeros([len(lake_state),len(DPS_resultfile)]) 
for z in range(len(DPS_resultfile)):
    for i in range(len(lake_state)):
        Y_new[i,z] = DPSpolicy(lake_state[i], DPS_resultfile[z,0:6]) # DPS best reliability
        
        
# Create one (x, y) pair of metadata per object
locations = pd.DataFrame({
    'id': range(n_objects),
    'x': x,
    'y': y
})




# Create a 50-element time-series for each object
timeseries = pd.DataFrame(Y_new,columns=locations['id'],index=pd.Index(['0', '0.2', '0.4', '0.6','0.8','1','1.2','1.4','1.6','1.8','2','2.2','2.4'], name='time'))


# Melt the wide-form timeseries into a long-form view
timeseries = timeseries.reset_index().melt('time')

# Merge the (x, y) metadata into the long-form view
timeseries['id'] = timeseries['id'].astype(int)  # make merge not complain
data = pd.merge(timeseries, locations, on='id')

# Data is prepared, now make a chart

selector = alt.selection_single(empty='all', fields=['id'])

base = alt.Chart(data).properties(
    width=250,
    height=250
).add_selection(selector)

points = base.mark_point(filled=True, size=200).encode(
    #x='mean(x)',
    #y='mean(y)',
    x=alt.X('x', axis=alt.Axis(title='P Concentration')),
    y=alt.Y('y', axis=alt.Axis(title='Economic Benefit')),
    color=alt.condition(selector, 'id:O', alt.value('lightgray'), legend=None),
)

timeseries = base.mark_line().encode(
    x=alt.X('time', axis=alt.Axis(title='Lake P Concentration')),
    y=alt.Y('value', axis=alt.Axis(title='Anthropogenic Release'),scale=alt.Scale(domain=(0,0.15))),
    color=alt.Color('id:O', legend=None)
).transform_filter(
    selector
)
points | timeseries
#chart=(points | timeseries)
#chart.save('policy.html')



Retrieving robustness solutions and defining success and failure states of the world

In [7]:
LHsamples = np.loadtxt('LHsamples.txt')

# read in satisficing measure of IT and DPS solutions
IT = np.loadtxt('ITrobustness.txt',delimiter=' ')
DPS = np.loadtxt('DPSrobustness.txt',delimiter=' ')

# find most robust IT and DPS solutions
ITbest = np.argmax(IT[:,2])
DPSbest = np.argmax(DPS[:,2])

# read in objective values of most robust IT and DPS solutions in alternative SOWs
mostRobustIT = np.loadtxt('./Intertemporal/output/ITobjs_' + str(int(ITbest)) + '.txt', delimiter=' ')
mostRobustDPS = np.loadtxt('./DPS/output/DPSobjs_' + str(int(DPSbest)) + '.txt', delimiter=' ')

# determine in which SOWs the most robust IT and DPS solutions fails
successes = [k for k in range(np.shape(LHsamples)[0]) if mostRobustIT[k,0] > 0.2 and mostRobustIT[k,3] > 0.95]
failures = [k for k in range(np.shape(LHsamples)[0]) if mostRobustIT[k,0] <= 0.2 or mostRobustIT[k,3] <= 0.95]
IT_success = LHsamples[successes,:]
IT_fail = LHsamples[failures,:]

successes = [k for k in range(np.shape(LHsamples)[0]) if mostRobustDPS[k,0] > 0.2 and mostRobustDPS[k,3] > 0.95]
failures = [k for k in range(np.shape(LHsamples)[0]) if mostRobustDPS[k,0] <= 0.2 or mostRobustDPS[k,3] <= 0.95]
DPS_success = LHsamples[successes,:]
DPS_fail = LHsamples[failures,:]

Selection Histogram: Using this type of figure allows the user to highlight sections of the scatter plot which contains points from different categories and to see a histogram of the highlighted points. I used this type of plot to recreate the first panel of Figure 10, which shows the parameter combinations that lead to success and failure in different states of the world for the critera of economic benefit > 0.2 and reliability > 0.95. For a rectangular cross section you can see the breakdown of success and failure in the histogram. 

In [15]:
n_objects_success=len(DPS_success)
n_objects_fail=len(DPS_fail)


data_success = pd.DataFrame({
    'id': range(n_objects_success),
    'x': DPS_success[:,0],
    'y': DPS_success[:,1],
    'state': 'Success'
})

data_failure = pd.DataFrame({
    'id': range(n_objects_fail),
    'x': DPS_fail[:,0],
    'y': DPS_fail[:,1],
    'state': 'Failure'
})    
    
frames=[data_success,data_failure]    
data = pd.concat(frames)

brush = alt.selection(type='interval')

points = alt.Chart(data).mark_point().encode(
    x=alt.X('x:Q', axis=alt.Axis(title='b'),scale=alt.Scale(domain=(0.1,0.45))),
    y=alt.Y('y:Q', axis=alt.Axis(title='q'),scale=alt.Scale(domain=(2,4.5))),
    color=alt.condition(brush, 'state:N', alt.value('lightgray'))
).add_selection(
    brush
)

bars = alt.Chart(data).mark_bar().encode(
    y='state:N',
    color='state:N',
    x='count(state):Q'
).transform_filter(
    brush
)

points & bars

#chart=(points & bars)
#chart.save('scenario_discovery.html')


Multi-panel scatter plot with linked brushing: By brushing on one plot, you see the corresponding points on the other. I thought this might be useful to view 2D subproblems side by side. 

In [16]:

IT_reference = np.loadtxt('Intertemporal.reference')
DPS_reference = np.loadtxt('DPS.reference')

data_DPS = pd.DataFrame({
    'Economic Benefit': -1*DPS_reference[:,0],
    'P Concentration': DPS_reference[:,1],
    'Inertia': -1*DPS_reference[:,2],
    'Reliability': -1*DPS_reference[:,3],
    'series': 'DPS'
})
    
data_IT = pd.DataFrame({
    'Economic Benefit': -1*IT_reference[:,0],
    'P Concentration': IT_reference[:,1],
    'Inertia': -1*IT_reference[:,2],
    'Reliability': -1*IT_reference[:,3],
    'series': 'IT'
})    

frames=[data_DPS,data_IT]    
data = pd.concat(frames)   

brush = alt.selection(type='interval', resolve='global')

base = alt.Chart(data).mark_point().encode(
    y='Economic Benefit',
    color=alt.condition(brush, 'series', alt.ColorValue('gray')),
).add_selection(
    brush
).properties(
    width=250,
    height=250
)

base.encode(x='P Concentration') | base.encode(x='Reliability')

#chart=(base.encode(x='P Concentration') | base.encode(x='Reliability'))
#chart.save('linked_scatter.html')

Linked Legend: This type of plot allows you to isolate parts of a plot by clicking on the legend. Here we have histograms of the Pareto fronts for the Intertemporal and DPS formulations. By clicking on the legend, the respective histogram will be kept while the other becomes transparent. 

In [14]:
import pandas as pd
import altair as alt
import numpy as np


# Generating Data
source_1 = pd.DataFrame({
    'DPS': -1*DPS_reference[0:77,0],
    'IT': -1*IT_reference[0:77:,0]
})

source_2 = pd.DataFrame({
    'DPS': DPS_reference[0:77,1],
    'IT':  IT_reference[0:77:,1]
})

source_3 = pd.DataFrame({
    'DPS': -1*DPS_reference[0:77,2],
    'IT':  -1*IT_reference[0:77:,2]
})

source_4= pd.DataFrame({
    'DPS': -1*DPS_reference[0:77,3],
    'IT':  -1*IT_reference[0:77:,3]
})

selection = alt.selection_multi(fields=['Formulation'], bind='legend')


base=alt.Chart(source_1).properties(
    width=250,
    height=250
).add_selection(selection)

hist_1=alt.Chart(source_1).transform_fold(
    ['DPS', 'IT'],
    as_=['Formulation', 'Economic Benefit']
).mark_area(
    opacity=1,
    interpolate='step'
).encode(
    alt.X('Economic Benefit:Q', bin=alt.Bin(maxbins=10)),
    alt.Y('count()', stack=None),
    alt.Color('Formulation:N'),
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
).add_selection(
    selection
)

hist_2=alt.Chart(source_2).transform_fold(
    ['DPS', 'IT'],
    as_=['Formulation', 'P Concentration']
).mark_area(
    opacity=1,
    interpolate='step'
).encode(
    alt.X('P Concentration:Q', bin=alt.Bin(maxbins=10)),
    alt.Y('count()', stack=None),
    alt.Color('Formulation:N'),
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
).add_selection(
    selection
)

hist_3=alt.Chart(source_3).transform_fold(
    ['DPS', 'IT'],
    as_=['Formulation', 'Inertia']
).mark_area(
    opacity=1,
    interpolate='step'
).encode(
    alt.X('Inertia:Q', bin=alt.Bin(maxbins=10)),
    alt.Y('count()', stack=None),
    alt.Color('Formulation:N'),
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
).add_selection(
    selection
)

hist_4=alt.Chart(source_4).transform_fold(
    ['DPS', 'IT'],
    as_=['Formulation', 'Reliability']
).mark_area(
    opacity=1,
    interpolate='step'
).encode(
    alt.X('Reliability:Q', bin=alt.Bin(maxbins=10)),
    alt.Y('count()', stack=None),
    alt.Color('Formulation:N'),
    opacity=alt.condition(selection, alt.value(1), alt.value(0.2))
).add_selection(
    selection
)

hist_1|hist_2|hist_3|hist_4
#chart=(hist_1|hist_2|hist_3|hist_4)
#chart.save('linked_legend.html')

