## Interactive Visualization

We could create interactive charts (multi line charts, heatmaps, animated bubble charts, and so on) that provide interaction techniques such as brushing, filtering, zoom, and hover.

## Bokeh, Plot.ly, or Altair 

Packages for interactive aata visualization (for the web) using Python

https://dash.plot.ly/getting-started

In [1]:
#!pip install bokeh

In [15]:
from bokeh.io import show, output_notebook, curdoc
from bokeh.plotting import figure
from ipywidgets import interact
from bokeh.layouts import row, column, widgetbox, gridplot
from bokeh.models import ColumnDataSource
from bokeh.models.widgets import Slider, TextInput, Button, Paragraph, Select

output_notebook()


In [3]:
def modify_doc(doc):
    
    # create some widgets
    button_1 = Button(label="Say HI")
    input_1 = TextInput(value="UT")
    output_1 = Paragraph()

    # add a callback to a widget
    def update():
        output_1.text = "Hello, " + input_1.value
    
    button_1.on_click(update)

    # create a layout for everything
    layout = column(button_1, input_1, output_1)

    # add the layout to curdoc
    doc.add_root(layout)
    
# In the notebook, just pass the function that defines the app to show
# You may need to supply notebook_url, e.g notebook_url="http://localhost:8889" 
show(modify_doc, notebook_url="http://localhost:8888") 

In [4]:
x = list(range(-20, 21))
y0 = [abs(elm) for elm in x]
y1 = [elm**2 for elm in x]

# create a column data source for the plots to share
source_1 = ColumnDataSource(data=dict(x=x, y0=y0, y1=y1))

TOOLS = "box_select,lasso_select, save, help"

# create a new plot and add a renderer
left = figure(tools=TOOLS, plot_width=300, plot_height=300, title="Abslutes")
left.circle('x', 'y0', source=source_1)

# create another new plot and add a renderer
right = figure(tools=TOOLS, plot_width=300, plot_height=300, title="Squares")
right.circle('x', 'y1', source=source_1)

p = gridplot([[left, right]])

show(p)

In [5]:
import pandas as pd
df = pd.read_csv('fortune500.csv')
df.columns = ['year', 'rank', 'company', 'revenue', 'profit']
non_numberic_profits = df.profit.str.contains('[^0-9.-]')
df = df.loc[~non_numberic_profits]
df.profit = pd.to_numeric(df.profit)
group_by_year = df.loc[:, ['year', 'company', 'revenue', 'profit']].groupby('year')
avgs = group_by_year.mean()
stds = group_by_year.std()


In [6]:
x = avgs.index
avgP = avgs.profit
avgR = avgs.revenue


In [7]:
stdP = stds.profit
stdR = stds.revenue

In [8]:
def modify_doc(doc):
    
    source_avg = ColumnDataSource(data=dict(
                                       x=x,
                                       y1=avgP,
                                       y2=avgR))
    
    TOOLS = 'tap'
    
    plot = figure(plot_width=300, plot_height=200, tools=TOOLS, toolbar_location=None, title='Profit')
    
    plot.circle(x='x', y='y1', size=5, color='green', source=source_avg,
                  hover_fill_color="black",
                  fill_alpha=0.7, hover_alpha=1,
                  line_color=None, hover_line_color="white") #fill_color="blue"
    
    plot.line(x='x', y='y1', source=source_avg, line_dash="4 4", line_width=1, color='gray')
    
    
    plot2 = figure(plot_width=300, plot_height=200, tools=TOOLS, toolbar_location=None, title='Revenu')
    
    plot2.circle(x='x', y='y2', size=5, color='green', source=source_avg,
                  hover_fill_color="black",
                  fill_alpha=0.7, hover_alpha=1,
                  line_color=None, hover_line_color="white") #fill_color="blue"
    
    plot2.line(x='x', y='y2', source=source_avg, line_dash="4 4", line_width=1, color='gray')
    
    
    #plot.line(x='x', y='y2', source=source, line_width=1.5, color='blue')
    
    # Set up widgets for input
    t = Paragraph()
    text = TextInput(title="Title", value='Profit')
    stat_select = Select(title="Statistics", value="AVG", options=["AVG", "STD"])
    point_select = Select(title="How many points", value="50", options=["10", "20", "30", "40", "50"])


    # Set up callbacks
    def update_title(attrname, old, new):
        t.text = text.value

    text.on_change('value', update_title)
    
    def update_data_source(attrname, old, new):

        # Get the current select value
        m = stat_select.value
        # Generate the new source
        if m=='AVG':
            t.text = "Mean"
            source_avg.data = dict(x=x, y1=avgP, y2=avgR)
        else:
            t.text = "Standard Deviation" 
            source_avg.data = dict(x=x, y1=stdP, y2=stdR)

    stat_select.on_change('value', update_data_source)

    def update_data(attrname, old, new):

        # Get the current select value
        a = point_select.value
        # Generate the new source
        if stat_select.value=='AVG':
            xsubset = x[:int(a)]
            y1subset = avgP[:int(a)]
            y2subset = avgR[:int(a)]
        else:
            xsubset = x[:int(a)]
            y1subset = stdP[:int(a)]
            y2subset = stdR[:int(a)]
            
        source_avg.data = dict(x=xsubset, y1=y1subset, y2=y2subset)

    point_select.on_change('value', update_data)


    # Set up layouts and add to document
    inputs = widgetbox(stat_select, point_select)
    t.text = "Mean"
    layout = column(t,row(inputs,plot,plot2, width=700))
    #doc.title = "Mean"
    doc.add_root(layout)
    
show(modify_doc)

In [9]:
#!pip install altair

In [10]:
#!pip install vega_datasets

In [17]:
import altair as alt
import pandas as pd

source = pd.DataFrame({
    'a': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I'],
    'b': [28, 55, 43, 91, 81, 53, 19, 87, 52]
})

alt.Chart(source).mark_bar().encode(
    x='a',
    y='b'
)

In [18]:
from vega_datasets import data

source = data.cars()

alt.Chart(source).mark_circle(size=60).encode(
    x='Horsepower',
    y='Miles_per_Gallon',
    color='Origin',
    tooltip=['Name', 'Origin', 'Horsepower', 'Miles_per_Gallon']
).interactive()

   ## Encoding Data Types
    Data Type	Shorthand Code	Description

    quantitative	Q	a continuous real-valued quantity

    ordinal	      O	a discrete ordered quantity
    
    nominal	      N	a discrete unordered category
    
    temporal	    T	a time or date value
    
    geojson	      G	a geographic shape

In [None]:
#avgs.to_json('avgs.json')
df_sub = df.loc[1:50,['year','profit']]
source = pd.DataFrame({
    'year': x,
    'profit': avgP,
    'revenue':avgR
})
alt.Chart(source).mark_circle(size=20).encode(
    x= 'year:N',
    y='profit:Q',
    color='revenue',
    tooltip=['year', 'profit', 'revenue']
).interactive()