In [223]:
import sys
import pandas as pd
import numpy as np
import ipywidgets 
import seaborn as sns
import matplotlib.pyplot as plt 
%config InlineBackend.figure_format = 'retina'

## Dummy data

In [282]:
cov = np.array([[6, 3], [3, 3.5]])
multinorm = np.random.multivariate_normal([5, 10], cov, size=100)
df = pd.DataFrame({'math score': multinorm[:, 0],
                       'reading score': multinorm[:, 1], 
                       'writing score': np.random.normal(2, 2, 100),
                   'gender': np.random.choice(['male', 'female'], size=100),
                   'race/ethnicity': np.random.choice(['group A', 'group B', 'group C', 'group D', 'group E'], size=100),
                    'lunch': np.random.choice(['free', 'standard'], size=100),
                    'parental education': np.random.choice(['highschool', 'bachelor', 'master'], size=100),
                    'preparation course': np.random.choice(['completed', 'none'], size=100)
                  })
df.head()

Unnamed: 0,math score,reading score,writing score,gender,race/ethnicity,lunch,parental education,preparation course
0,6.5453,10.116302,1.248035,male,group E,free,master,none
1,7.582902,11.270267,4.720812,female,group A,standard,highschool,completed
2,9.403985,12.898436,1.315492,male,group A,free,master,none
3,7.829126,11.19781,2.157508,female,group E,free,master,completed
4,2.022531,7.941776,-0.11273,female,group A,standard,highschool,none


In [275]:
def scatter(x,y,hue):
    plt.close('all')
    fig = plt.figure(figsize=(8,4))
    sns.scatterplot(data=df,x=x,y=y,hue=hue)
    plt.title('Scatterplot of ' +x+' versus '+ y)
    plt.show()
    
drop_down_x = ipywidgets.Dropdown(options=list(df.drop(['math score','reading score', 'writing score'],axis=1).columns),
                                value=list(df.drop(['math score','reading score', 'writing score'],axis=1).columns)[0],
                                description='X variable:',
                                disabled=False)

drop_down_y = ipywidgets.Dropdown(options=list(['math score','reading score', 'writing score']),
                                value=list(['math score','reading score', 'writing score'])[0],
                                description='Y variable:',
                                disabled=False)

drop_down_hue= ipywidgets.Dropdown(options=list(df.drop(['math score','reading score', 'writing score'],axis=1).columns),
                                value=list(df.drop(['math score','reading score', 'writing score'],axis=1).columns)[0],
                                description='Hue:',
                                disabled=False)

in3 = ipywidgets.HBox([drop_down_x,drop_down_y,drop_down_hue])
out3 = ipywidgets.interactive_output(scatter,
                                     {'x' : drop_down_x,
                                     'y': drop_down_y,
                                     'hue':drop_down_hue})
display(in3, out3)

HBox(children=(Dropdown(description='X variable:', options=('gender', 'race/ethnicity', 'lunch', 'parental edu…

Output()

In [276]:
def boxplot(selected_x, selected_y):
    plt.close('all')
    fig = plt.figure(figsize=(8,4))
    sns.boxplot(x=df[selected_x],y=df[selected_y])
    plt.title(f'Boxplot of {selected_y} for {selected_x}')
    plt.show()
    
drop_down_x = ipywidgets.Dropdown(options=list(df.drop(['math score','reading score', 'writing score'],axis=1).columns),
                                value=list(df.drop(['math score','reading score', 'writing score'],axis=1).columns)[0],
                                description='X variable:',
                                disabled=False)

drop_down_y = ipywidgets.Dropdown(options=list(['math score','reading score', 'writing score']),
                                value=list(['math score','reading score', 'writing score'])[0],
                                description='Y variable:',
                                disabled=False)

in1 = ipywidgets.HBox([drop_down_x, drop_down_y])
out1 = ipywidgets.interactive_output(boxplot,
                                     {'selected_x' : drop_down_x, 'selected_y' : drop_down_y})
display(in1, out1)

HBox(children=(Dropdown(description='X variable:', options=('gender', 'race/ethnicity', 'lunch', 'parental edu…

Output()

## Generic plot with menus

In [308]:
graphics = [
    sns.boxplot, 
    sns.scatterplot, 
    sns.violinplot,
    sns.boxenplot, 
    sns.swarmplot,
    sns.stripplot,
    sns.kdeplot,
    sns.barplot,
]

def interactive_plot(df, defaults, graphics=graphics, strings_as_cats=False, palette="colorblind"):
    def plot(selected_x, selected_y, selected_hue=None, selected_plot=graphics[0]):
        plt.close('all')
        fig, ax = plt.subplots(1, 1, figsize=(8,4))
        sns.set_style('ticks')
        kwargs = dict(x=df[selected_x], y=df[selected_y])
        if selected_hue in df:
            kwargs['hue'] = df[selected_hue]
            kwargs['palette'] = palette
        try:
            selected_plot(**kwargs, ax=ax)
        except TypeError as e:
            print(f"{str(e).split(',')[0]}. That does not compatible with {selected_plot.__name__}", file=sys.stderr)
            plt.close('all')            
            return
        plt.title(f'{selected_plot.__name__.capitalize()} of {selected_y} for {selected_x}')
        handles, labels = ax.get_legend_handles_labels()
        if labels:
            ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
        sns.despine()
        plt.show()
    
    categorical_col_names = df.columns[(df.map(type) == str).all(0)].to_list()

    dropdowns = {}
    if 'plot' in defaults:
        plot_options = [(g.__name__, g) for g in graphics]
        names, funs = zip(*plot_options)
        i = names.index(defaults['plot'])
        options = plot_options[i:i+1] + plot_options[:i] + plot_options[i+1:] 
        drop_down_plot = ipywidgets.Dropdown(options=options, description='Plot:', disabled=False)
        dropdowns['selected_plot'] = drop_down_plot
    if 'x' in defaults:
        x_options = df.columns.to_list()
        drop_down_x = ipywidgets.Dropdown(options=x_options, value=defaults['x'], description='X variable:', disabled=False)
        dropdowns['selected_x'] = drop_down_x
    if 'y' in defaults:
        if strings_as_cats:
            y_options = df.drop(categorical_col_names,axis=1).columns
        else:
            y_options = df.columns.to_list()
        drop_down_y = ipywidgets.Dropdown(options=y_options, value=defaults['y'], description='Y variable:', disabled=False)
        dropdowns['selected_y'] = drop_down_y
    if 'hue' in defaults:
        if strings_as_cats:
            hue_options = df.columns[(df.map(type) == str).all(0)]
        else:
            hue_options = df.columns.to_list()
        drop_down_hue= ipywidgets.Dropdown(options=hue_options, value=defaults['hue'], description='Hue:', disabled=False)
        dropdowns['selected_hue'] = drop_down_hue

    display(ipywidgets.HBox(list(dropdowns.values())),
            ipywidgets.interactive_output(plot, dropdowns))

## Examples

In [309]:
defaults = {'x':'gender', 'y':'math score', 'hue':'race/ethnicity', 'plot':'boxplot'}
interactive_plot(df, defaults, strings_as_cats=True, palette="tab10")

HBox(children=(Dropdown(description='Plot:', options=(('boxplot', <function boxplot at 0x14fcf6340>), ('scatte…

Output()

In [304]:
defaults = {'x':'reading score', 'y':'math score', 'hue':'parental education', 'plot':'scatterplot'}
interactive_plot(df, defaults)

HBox(children=(Dropdown(description='Plot:', options=(('scatterplot', <function scatterplot at 0x14fa37920>), …

Output()

In [305]:
defaults = {'x':'gender', 'y':'math score', 'plot':'swarmplot'}
interactive_plot(df, defaults)

HBox(children=(Dropdown(description='Plot:', options=(('swarmplot', <function swarmplot at 0x14fcf65c0>), ('bo…

Output()

In [306]:
defaults = {'x':'reading score', 'y':'writing score'}
interactive_plot(df, defaults, graphics=[sns.scatterplot])

HBox(children=(Dropdown(description='X variable:', index=1, options=('math score', 'reading score', 'writing s…

Output()

In [307]:
defaults = {'x':'reading score', 'y':'math score', 'hue':'gender'}
interactive_plot(df, defaults, graphics=[sns.kdeplot])

HBox(children=(Dropdown(description='X variable:', index=1, options=('math score', 'reading score', 'writing s…

Output()