#### NYC Flights Database

In [1]:
import datetime
import numpy as np
import pandas as pd

import plotly.graph_objects as go
from ipywidgets import widgets
from causality_simulation import *
import plotly.express as px
%matplotlib inline

In [2]:
config_control = {
    'name': 'Control',
    'N': 250,
    'intervene': {
    }
}
config_intervene = {
    'name': 'Intervention (no bees)',
    'N': 250,
    'intervene': {
        'Number of Bees': ['fixed', 0],
        'Supplement': ['fixed', 'Kombucha']
    }
}
config = [config_control, config_intervene]
fruit_experiment = Experiment(fruit_node)
fruit_experiment.fixedSetting(config=config)

['Water', 'Kombucha', 'Milk', 'Tea']


VBox(children=(HBox(children=(Label(value='Name the Group', layout=Layout(width='150px')), Text(value='Control…

['Water', 'Kombucha', 'Milk', 'Tea']


VBox(children=(HBox(children=(Label(value='Name the Group', layout=Layout(width='150px')), Text(value='Interve…

In [None]:
data = fruit_experiment.data

In [None]:
import random
data['Control']['h'] = np.array(random.choices(['hi', 'hello', 'how are you'], k=250))
data['Intervention (no bees)']['h'] = np.array(random.choices(['hi', 'hello', 'how are you'], k=250))

We'll be making an application to take a look at delays from all flights out of NYC in the year 2013.

Let's assign the widgets that we're going to be using in our app. In general all these widgets will be used to filter the data set, and thus what we visualize.

In [34]:
options = list(fruit_experiment.node.network.keys())
textbox1 = widgets.Dropdown(
    description='x-Axis Variable: ',
    value=options[0],
    options=options
)
textbox2 = widgets.Dropdown(
    description='y-Axis Variable: ',
    value=options[0],
    options=options + ['None (Distributions Only)']
)

# Assign an empty figure widget with two traces

def choose_trace(x, y, self=fruit_experiment):
    xType, yType = self.node.nodeDict()[x].vartype, self.node.nodeDict()[y].vartype
    if xType != 'categorical' and yType != 'categorical':
        return 'scatter'
    elif xType == 'categorical' and yType != 'categorical':
        return 'bar'
    elif xType != 'categorical' and yType == 'categorical':
        return 'barh'
        

def construct_trace(x, y, traceType, self=fruit_experiment):
    if traceType == 'scatter':
        return lambda x={}, y={}, name=None: go.Scatter(x=x, y=y, mode='markers', opacity=0.75, name=name)
    elif traceType == 'bar':
        avg = self.data.groupby(x).agg('mean')
        std = self.data.groupby(x).agg('std')[y]
        return lambda x={}, y={}, name=None: go.Bar(x=list(avg.index), y=avg[y], name=name, error_y=dict(type='data', array=std[y]))
    elif traceType == 'barh':
        avg = self.data.groupby(y).agg('mean')
        std = self.data.groupby(y).agg('std')[x]
        return lambda x={}, y={}, name=None: go.Bar(x=avg[x], y=list(avg.index), name=name, error_y=dict(type='data', array=std[x]), orientation='h')

    
traces = []
for group in fruit_experiment.group_names:
    traces += [construct_trace(options[0], options[0], choose_trace(options[0], options[0]))(x=fruit_experiment.data[group][options[0]], y=fruit_experiment.data[group][options[0]], name=group)]

g = go.FigureWidget(data=traces,
                    layout=go.Layout(
                        title=dict(
                            text=options[0] + " vs. " + options[0]
                        ),
                        barmode='overlay',
                        height=500,
                        width=800,
                        xaxis=dict(title=options[0]),
                        yaxis=dict(title=options[0])
                    ))

Let now write a function that will handle the input from the widgets, and alter the state of the graph.

In [35]:
def validate():
    return textbox1.value in options and textbox2.value in (options + ['None (Distributions Only)'])


def response(change):
    if validate():
        if textbox2.value in options:
            traceType = choose_trace(textbox1.value, textbox2.value)
            with g.batch_update():
                if traceType == 'scatter':
                    for i in range(len(fruit_experiment.group_names)):
                        g.data[i].x = fruit_experiment.data[fruit_experiment.group_names[i]][textbox1.value]
                        g.data[i].y = fruit_experiment.data[fruit_experiment.group_names[i]][textbox2.value]
                        g.data[i].error_y = {'visible':False}
                        g.data[i].error_x = {'visible':False}
                        g.data[i].orientation = None
                    g.plotly_restyle({'type':'scatter', 'opacity':0.75})
                elif traceType == 'bar':
                    g.plotly_restyle({'type':'bar', 'opacity':1})
                    for i in range(len(fruit_experiment.group_names)):
                        avg = fruit_experiment.data[fruit_experiment.group_names[i]].groupby(textbox1.value).agg('mean')
                        std = fruit_experiment.data[fruit_experiment.group_names[i]].groupby(textbox1.value).agg('std')[textbox2.value]
                        g.data[i].x = list(avg.index)
                        g.data[i].y = avg[textbox2.value]
                        g.data[i].error_y = dict(type='data', array=std, visible=True)
                        g.data[i].error_x = {'visible':False}
                        g.data[i].orientation = None
                elif traceType == 'barh':
                    g.plotly_restyle({'type':'bar', 'opacity':1})
                    for i in range(len(fruit_experiment.group_names)):
                        avg = fruit_experiment.data[fruit_experiment.group_names[i]].groupby(textbox2.value).agg('mean')
                        std = fruit_experiment.data[fruit_experiment.group_names[i]].groupby(textbox2.value).agg('std')[textbox1.value]
                        g.data[i].x = avg[textbox1.value]
                        g.data[i].y = list(avg.index)
                        g.data[i].error_x = dict(type='data', array=std, visible=True)
                        g.data[i].orientation = 'h'
                        g.data[i].error_y  = {'visible':False}
                g.layout.xaxis.title = textbox1.value
                g.layout.yaxis.title = textbox2.value
                g.layout.title = textbox1.value + " vs. " + textbox2.value
        else:
            with g.batch_update():
                if fruit_experiment.node.nodeDict()[textbox1.value].vartype == "categorical":
                    g.plotly_restyle({'opacity':1})
                else:
                    g.plotly_restyle({'opacity':0.75})
                for i in range(len(fruit_experiment.group_names)):
                    g.data[i].x = fruit_experiment.data[fruit_experiment.group_names[i]][textbox1.value]
                    g.data[i].y = None
                    g.data[i].error_x = {'visible':False}
                    g.data[i].error_y = {'visible':False}
                    g.data[i].orientation = None
                g.layout.xaxis.title = textbox1.value
                g.layout.yaxis.title = "Count"
                g.layout.title = textbox1.value
                g.plotly_restyle({'type':'histogram'})

textbox1.observe(response, names="value")
textbox2.observe(response, names="value")

Time to try the app out!!

In [36]:
container = widgets.HBox([textbox1, textbox2])
widgets.VBox([container,
              g])

VBox(children=(HBox(children=(Dropdown(description='x-Axis Variable: ', options=('Soil Quality', 'Number of Fr…

In [None]:
#table with radio buttons

In [None]:
go.Figure().update_layout({'height':10, 'width':10})
df

In [None]:
df['Supplement'].unique()

In [None]:

widgets.RadioButtons(
    options=list(data.keys()),
#    value='pineapple', # Defaults to 'pineapple'
    layout={'width': 'max-content'}, # If the items' names are long
    description='Group',
    disabled=False
)

In [None]:
pd.concat([data['Control'], data['Intervention (no bees)']]).groupby(['Supplement', 'h']).agg('count').reset_index().pivot('Supplement', 'h', 'x')

In [None]:
#vertical bar chart

In [None]:
y_avg = np.mean(fruit_experiment.data['Control']['x'])
y_std = np.std(fruit_experiment.data['Control']['x'])

In [None]:
trace = go.Bar(x=avg['Supplement'], y=avg['Number of Bees'], error_y=dict(type='data', array=std['Number of Bees']))
go.Figure(trace)

In [None]:
#horizontal bar plot
trace = go.Bar(y=avg['Supplement'], x=avg['Number of Bees'], error_x=dict(type='data', array=std['Number of Bees']), orientation='h')
go.Figure([trace, trace, trace])

In [None]:
traces = []
for supplement in avg['Supplement']:
    trace = go.Bar(x=[supplement], y=avg[avg['Supplement'] == supplement]['Number of Bees'], error_y=dict(type='data', array=std[std['Supplement'] == supplement]['Number of Bees']))
    traces += [trace]
go.Figure(traces)

In [None]:
avg[avg['Supplement'] == supplement]['Number of Bees']

In [None]:
#bar plot of counts

In [None]:
traces = []
for group in list(data.keys()):
    traces += [go.Bar(x=data[group].groupby('h').agg('count')['x'].index, y=data[group].groupby('h').agg('count')['x'])]
go.Figure(traces)

In [None]:
%%html
<img src = 'https://cloud.githubusercontent.com/assets/12302455/16637308/4e476280-43ac-11e6-9fd3-ada2c9506ee1.gif' >

#### Reference

In [None]:
help(go.FigureWidget)