In [None]:
import pandas as pd
import numpy as np
from buckaroo.buckaroo_widget import BuckarooWidget

In [None]:
df = pd.read_csv('/Users/paddy/code/citibike-play/2014-01 - Citi Bike trip data.csv')
df

In [None]:
w = BuckarooWidget(df, showCommands=False)
w

# Adding a summary stat

In [None]:
from buckaroo.pluggable_analysis_framework import (ColAnalysis)
from scipy.stats import skew
class Skew(ColAnalysis):
    provided_summary = ["skew"]
    requires_summary = []
    
    @staticmethod
    def summary(sampled_ser, summary_ser, ser):
        if pd.api.types.is_integer_dtype(sampled_ser):
            return dict(skew=skew(sampled_ser.astype('int64')))
        elif pd.api.types.is_float_dtype(sampled_ser):
            return dict(skew=skew(sampled_ser.astype('float64')))
        else:
            return dict(skew="NA")
    summary_stats_display = [
        'dtype',
        'length',
        'nan_count',
        'distinct_count',
        'empty_count',
        'empty_per',
        'unique_per',
        'nan_per',
        'is_numeric',
        'is_integer',
        'is_datetime',
        'mode',
        'min',
        'max',
        'mean',
        # we must add skew to the list of summary_stats_display, otherwise our new stat won't be displayed
        'skew']
w.add_analysis(Skew)

In [None]:
w.stats.presentation_sdf

# Making a new default dataframe display function

In [None]:
from buckaroo.widget_utils import disable
from IPython.core.getipython import get_ipython
from IPython.display import display
import warnings

disable()
def my_display_as_buckaroo(df):
    w  = BuckarooWidget(df, showCommands=False)
    #the analysis we added throws warnings, let's muffle that when used as the default display
    warnings.filterwarnings('ignore')
    w.add_analysis(Skew)
    warnings.filterwarnings('default')
    return display(w)

def my_enable():
    """
    Automatically use buckaroo to display all DataFrames
    instances in the notebook.

    """
    ip = get_ipython()
    if ip is None:
        print("must be running inside ipython to enable default display via enable()")
        return
    ip_formatter = ip.display_formatter.ipython_display_formatter
    ip_formatter.for_type(pd.DataFrame, my_display_as_buckaroo)
my_enable()

In [None]:
df

# Adding a Command to the Low Code UI

In [None]:
from buckaroo.all_transforms import Command
from buckaroo.lispy import s
#Here we start adding commands to the Buckaroo Widget.  Every call to add_command replaces a command with the same name
@w.add_command
class GroupBy2(Command):
    command_default = [s("groupby2"), s('df'), 'col', {}]
    command_pattern = [[3, 'colMap', 'colEnum', ['null', 'sum', 'mean', 'median', 'count']]]
    @staticmethod 
    def transform(df, col, col_spec):
        grps = df.groupby(col)
        df_contents = {}
        for k, v in col_spec.items():
            if v == "sum":
                df_contents[k] = grps[k].apply(lambda x: x.sum())
            elif v == "mean":
                df_contents[k] = grps[k].apply(lambda x: x.mean())
            elif v == "median":
                df_contents[k] = grps[k].apply(lambda x: x.median())
            elif v == "count":
                df_contents[k] = grps[k].apply(lambda x: x.count())
        return pd.DataFrame(df_contents)

    @staticmethod 
    def transform_to_py(df, col, col_spec):
        commands = [
            "    grps = df.groupby('%s')" % col,
            "    df_contents = {}"
        ]
        for k, v in col_spec.items():
            if v == "sum":
                commands.append("    paddydf_contents['%s'] = grps['%s'].apply(lambda x: x.sum())" % (k, k))
            elif v == "mean":
                commands.append("    df_contents['%s'] = grps['%s'].apply(lambda x: x.mean())" % (k, k))
            elif v == "median":
                commands.append("    df_contents['%s'] = grps['%s'].apply(lambda x: x.median())" % (k, k))
            elif v == "count":
                commands.append("    df_contents['%s'] = grps['%s'].apply(lambda x: x.count())" % (k, k))
        commands.append("    df = pd.DataFrame(df_contents)")
        return "\n".join(commands)


Note that `groupby2` has been added to the commands