#### NYC Flights Database

In [1]:
import datetime
import numpy as np
import pandas as pd

import plotly.graph_objects as go
from ipywidgets import widgets
from causality_simulation import *
import plotly.express as px
from IPython.display import display, update_display
%matplotlib inline

In [2]:
config_control = {
    'name': 'Control',
    'N': 250,
    'intervene': {
    }
}
config_intervene = {
    'name': 'Intervention (no bees)',
    'N': 250,
    'intervene': {
        'Number of Bees': ['fixed', 0],
        'Supplement': ['fixed', 'Kombucha']
    }
}
config = [config_control, config_intervene]
fruit_experiment = Experiment(fruit_node)
fruit_experiment.fixedSetting(config=config)

VBox(children=(HBox(children=(Label(value='Name the Group', layout=Layout(width='150px')), Text(value='Control…

VBox(children=(HBox(children=(Label(value='Name the Group', layout=Layout(width='150px')), Text(value='Interve…

In [None]:
data = fruit_experiment.data

In [None]:
fruit_experiment.node.drawNetwork()

In [None]:
import random
fruit_experiment.data['Control']['h'] = np.array(random.choices(['hi', 'hello', 'how are you'], k=250))
fruit_experiment.data['Intervention (no bees)']['h'] = np.array(random.choices(['hi', 'hello', 'how are you'], k=250))

We'll be making an application to take a look at delays from all flights out of NYC in the year 2013.

Let's assign the widgets that we're going to be using in our app. In general all these widgets will be used to filter the data set, and thus what we visualize.

In [14]:
x_options = list(fruit_experiment.node.network.keys())
y_options = x_options.copy() + ['None (Distributions Only)']
textbox1 = widgets.Dropdown(
    description='x-Axis Variable: ',
    value=x_options[0],
    options=x_options
)
textbox2 = widgets.Dropdown(
    description='y-Axis Variable: ',
    value=y_options[0],
    options=y_options
)
button = widgets.RadioButtons(
    options=list(fruit_experiment.data.keys()) + ['All'],
    layout={'width': 'max-content'},
    description='Group',
    disabled=False
)

class Nothing:
    def __init__(self):
        None
    def __repr__(self):
        return ""

def choose_trace(x, y, self=fruit_experiment):
    xType, yType = self.node.nodeDict()[x].vartype, self.node.nodeDict()[y].vartype
    if xType != 'categorical' and yType != 'categorical':
        return 'scatter'
    elif xType == 'categorical' and yType != 'categorical':
        return 'bar'
    elif xType != 'categorical' and yType == 'categorical':
        return 'barh'
    else:
        return 'table'
        

def construct_trace(x, y, traceType, self=fruit_experiment):
    if traceType == 'scatter':
        return lambda x={}, y={}, name=None: go.Scatter(x=x, y=y, mode='markers', opacity=0.75, name=name)
    elif traceType == 'bar':
        avg = self.data.groupby(x).agg('mean')
        std = self.data.groupby(x).agg('std')[y]
        return lambda x={}, y={}, name=None: go.Bar(x=list(avg.index), y=avg[y], name=name, error_y=dict(type='data', array=std[y]))
    elif traceType == 'barh':
        avg = self.data.groupby(y).agg('mean')
        std = self.data.groupby(y).agg('std')[x]
        return lambda x={}, y={}, name=None: go.Bar(x=avg[x], y=list(avg.index), name=name, error_y=dict(type='data', array=std[x]), orientation='h')
    elif traceType == 'table':
        return lambda x={}, y={}, name=None: go.Scatter(layout={'height':10, 'width':10})

def pivot_table():
    if textbox1.value == textbox2.value:
        df = "Cannot create a pivot table with only one variable"
        return df
    if button.value == 'All':
        for group in fruit_experiment.group_names:
            df = pd.DataFrame()
            df = pd.concat([df, fruit_experiment.data[group]])
        df = df.groupby([textbox1.value, textbox2.value]).agg('count').reset_index().pivot(textbox1.value, textbox2.value, options[0])
    else:
        df = fruit_experiment.data[button.value].groupby([textbox1.value, textbox2.value]).agg('count').reset_index().pivot(textbox1.value, textbox2.value, options[0])
    return df

def update_table(change):
    update_display(pivot_table(), display_id='1');
    button.layout.display = 'flex'
    
traces = []
for group in fruit_experiment.group_names:
    traces += [construct_trace(x_options[0], y_options[0], choose_trace(x_options[0], y_options[0]))(x=fruit_experiment.data[group][x_options[0]], y=fruit_experiment.data[group][y_options[0]], name=group)]
    
g = go.FigureWidget(data=traces,
                    layout=go.Layout(
                        title=dict(
                            text=x_options[0] + " vs. " + y_options[0]
                        ),
                        barmode='overlay',
                        height=500,
                        width=800,
                        xaxis=dict(title=x_options[0]),
                        yaxis=dict(title=y_options[0])
                    ))

Let now write a function that will handle the input from the widgets, and alter the state of the graph.

In [11]:
def validate():
    return textbox1.value in x_options and textbox2.value in (x_options + ['None (Distributions Only)'])


def response(change):
    if validate():
        if textbox2.value in x_options:
            traceType = choose_trace(textbox1.value, textbox2.value)
            with g.batch_update():
                if traceType == 'table':
                    g.update_layout({'height':10, 'width':10})
                    g.layout.xaxis.title = ""
                    g.layout.yaxis.title = ""
                    g.layout.title = ""
                    button.layout.display = 'flex'
                else:
                    if traceType == 'scatter':
                        for i in range(len(fruit_experiment.group_names)):
                            g.data[i].x = fruit_experiment.data[fruit_experiment.group_names[i]][textbox1.value]
                            g.data[i].y = fruit_experiment.data[fruit_experiment.group_names[i]][textbox2.value]
                            g.data[i].error_y = {'visible':False}
                            g.data[i].error_x = {'visible':False}
                            g.data[i].orientation = None
                        g.plotly_restyle({'type':'scatter', 'opacity':0.75})
                    elif traceType == 'bar':
                        g.plotly_restyle({'type':'bar', 'opacity':1})
                        for i in range(len(fruit_experiment.group_names)):
                            avg = fruit_experiment.data[fruit_experiment.group_names[i]].groupby(textbox1.value).agg('mean')
                            std = fruit_experiment.data[fruit_experiment.group_names[i]].groupby(textbox1.value).agg('std')[textbox2.value]
                            g.data[i].x = list(avg.index)
                            g.data[i].y = avg[textbox2.value]
                            g.data[i].error_y = {'type':'data', 'array':std, 'visible':True}
                            g.data[i].error_x = {'visible':False}
                            g.data[i].orientation = None
                    elif traceType == 'barh':
                        g.plotly_restyle({'type':'bar', 'opacity':1})
                        for i in range(len(fruit_experiment.group_names)):
                            avg = fruit_experiment.data[fruit_experiment.group_names[i]].groupby(textbox2.value).agg('mean')
                            std = fruit_experiment.data[fruit_experiment.group_names[i]].groupby(textbox2.value).agg('std')[textbox1.value]
                            g.data[i].x = avg[textbox1.value]
                            g.data[i].y = list(avg.index)
                            g.data[i].error_x = {'type':'data', 'array':std, 'visible':True}
                            g.data[i].orientation = 'h'
                            g.data[i].error_y  = {'visible':False}
                    g.layout.xaxis.title = textbox1.value
                    g.layout.yaxis.title = textbox2.value
                    g.layout.title = textbox1.value + " vs. " + textbox2.value
                    g.update_layout({'height':500, 'width':800})
                    update_display(Nothing(), display_id='1')
                    button.layout.display = 'none'
        else:
            with g.batch_update():
                if fruit_experiment.node.nodeDict()[textbox1.value].vartype == "categorical":
                    g.plotly_restyle({'opacity':1})
                else:
                    g.plotly_restyle({'opacity':0.75})
                for i in range(len(fruit_experiment.group_names)):
                    g.data[i].x = fruit_experiment.data[fruit_experiment.group_names[i]][textbox1.value]
                    g.data[i].y = None
                    g.data[i].error_x = {'visible':False}
                    g.data[i].error_y = {'visible':False}
                    g.data[i].orientation = None
                g.layout.xaxis.title = textbox1.value
                g.layout.yaxis.title = "Count"
                g.layout.title = textbox1.value
                g.plotly_restyle({'type':'histogram'})

textbox1.observe(response, names="value")
textbox2.observe(response, names="value")
button.observe(update_table, names='value')

In [12]:
container = widgets.HBox([textbox1, textbox2])
display(widgets.VBox([container,
              g]))
display(button)
display(Nothing(), display_id='1')
button.layout.display = 'none'

VBox(children=(HBox(children=(Dropdown(description='x-Axis Variable: ', options=('Fertilizer', 'Number of Bees…

RadioButtons(description='Group', layout=Layout(width='max-content'), options=('Control', 'Intervention (no be…




elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison



In [None]:
go.histogram.XBins(start=0, end=20, size=5)

Time to try the app out!!

In [None]:
g.data[0].error_x = {'visible':True}

In [None]:
fruit_experiment.data['Intervention (no bees)']['x']

In [None]:
#table with radio buttons

In [None]:
go.FigureWidget().update_layout({'height':10, 'width':10})

In [None]:
df['Supplement'].unique()

In [None]:
button = widgets.RadioButtons(
    options=list(fruit_experiment.data.keys()) + ['All'],
#    value='pineapple', # Defaults to 'pineapple'
    layout={'width': 'max-content'}, # If the items' names are long
    description='Group',
    disabled=False
)

def pivot_table():
    if button.value == 'All':
        for group in fruit_experiment.group_names:
            df = pd.DataFrame()
            df = pd.concat([df, fruit_experiment.data[group]])
        df = df.groupby([textbox1.value, textbox2.value]).agg('count').reset_index().pivot(textbox1.value, textbox2.value, options[0])
    else:
        df = fruit_experiment.data[button.value].groupby([textbox1.value, textbox2.value]).agg('count').reset_index().pivot(textbox1.value, textbox2.value, options[0])
    return df

button.observe(update_table, names='value')
display(button, pivot_table(), display_id='1');
button.layout.display = 'flex'

In [None]:
button1 = widgets.RadioButtons(
    options=list(fruit_experiment.data.keys()) + ['All'],
#    value='pineapple', # Defaults to 'pineapple'
    layout={'width': 'max-content'}, # If the items' names are long
    description='Group',
    disabled=False
)
def test(change):
    print(change['new'] == 'Control')
button1.observe(test, names='value')
button1

In [None]:
fruit_experiment.data[button.value].groupby([textbox1.value, textbox2.value]).agg('count')#.reset_index().pivot(textbox1.value, textbox2.value, options[0])



In [None]:
def pivot_table():
    if button.value == 'All':
        for group in fruit_experiment.group_names:
            df = pd.DataFrame()
            df = pd.concat([df, fruit_experiment.data[group]])
        df = df.groupby([textbox1.value, textbox2.value]).agg('count').reset_index().pivot(textbox1.value, textbox2.value, options[0])
    else:
        df = fruit_experiment.data[button.value].groupby([textbox1.value, textbox2.value]).agg('count').reset_index().pivot(textbox1.value, textbox2.value, options[0])
    return df

display(button, pivot_table(), display_id='1');
button.layout.display = 'flex'

In [None]:
from IPython.display import display, update_display
display(button, df, display_id='1');
button.layout.display = 'flex'

In [None]:
update_display(Nothing(), display_id='1')
button.layout.display = 'none'

In [None]:
display(button, df, display_id='1');
button.layout.display = 'flex'

In [None]:
class Nothing:
    def __init__(self):
        None
    def __repr__(self):
        return ""

In [None]:
df

In [None]:
#vertical bar chart

In [None]:
y_avg = np.mean(fruit_experiment.data['Control']['x'])
y_std = np.std(fruit_experiment.data['Control']['x'])

In [None]:
trace = go.Bar(x=avg['Supplement'], y=avg['Number of Bees'], error_y=dict(type='data', array=std['Number of Bees']))
go.Figure(trace)

In [None]:
#horizontal bar plot
trace = go.Bar(y=avg['Supplement'], x=avg['Number of Bees'], error_x=dict(type='data', array=std['Number of Bees']), orientation='h')
go.Figure([trace, trace, trace])

In [None]:
traces = []
for supplement in avg['Supplement']:
    trace = go.Bar(x=[supplement], y=avg[avg['Supplement'] == supplement]['Number of Bees'], error_y=dict(type='data', array=std[std['Supplement'] == supplement]['Number of Bees']))
    traces += [trace]
go.Figure(traces)

In [None]:
avg[avg['Supplement'] == supplement]['Number of Bees']

In [None]:
#bar plot of counts

In [None]:
traces = []
for group in list(data.keys()):
    traces += [go.Bar(x=data[group].groupby('h').agg('count')['x'].index, y=data[group].groupby('h').agg('count')['x'])]
go.Figure(traces)

In [None]:
%%html
<img src = 'https://cloud.githubusercontent.com/assets/12302455/16637308/4e476280-43ac-11e6-9fd3-ada2c9506ee1.gif' >

#### Reference

In [None]:
help(go.FigureWidget)