In [None]:
import pandas as pd
import numpy as np
from buckaroo.dataflow.dataflow import StylingAnalysis
from buckaroo.buckaroo_widget import BuckarooWidget
from buckaroo.customizations.analysis import TypingStats
N = 500
NA = pd.NA
ROWS = 200
typed_df = pd.DataFrame({'int_col':np.random.randint(1,50, ROWS), 'float_col': np.random.randint(1,30, ROWS)/.7,
                         "str_col": ["foobar"]* ROWS})

This notebook generally follows the order of [DFWhole.ts](https://github.com/paddymul/buckaroo/blob/main/js/components/DFViewerParts/DFWhole.ts)

Starting with the simple Displayers
```js
export interface ObjDisplayerA {
  displayer: 'obj';}

export interface BooleanDisplayerA {
  displayer: 'boolean';}

export interface StringDisplayerA {
  displayer: 'string';
  max_length?: number;} 
  
export interface FloatDisplayerA {
  displayer: 'float';
  min_fraction_digits: number;
  max_fraction_digits: number;}

export interface DatetimeDefaultDisplayerA {
  displayer: 'datetimeDefault';}
export interface IntegerDisplayerA {
  displayer: 'integer';
  min_digits: number;
  max_digits: number;}

export interface DatetimeLocaleDisplayerA {
  displayer: 'datetimeLocaleString';
  locale: 'en-US' | 'en-GB' | 'en-CA' | 'fr-FR' | 'es-ES' | 'de-DE' | 'ja-JP';
  // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/DateTimeFormat/DateTimeFormat
  args: Intl.DateTimeFormatOptions;}
```

In [None]:
#simpler Buckaroo that really shows the styling
def obj_(pkey):
    return {'primary_key_val': pkey, 'displayer_args': { 'displayer': 'obj' } }

class GalleryStyling(StylingAnalysis):
    requires_summary = ['dtype']
    pinned_rows = [obj_('dtype')]
class GalleryBuckaroo(BuckarooWidget):
    analysis_klasses = [GalleryStyling, TypingStats]

In [None]:
obj_df = pd.DataFrame({
    'bools':[True, True, False, False, True, None],
    'ints': [   5,   20,    30,   -10, 7772, None],
    'timestamp':["2020-01-01 01:00Z", "2020-01-01 02:00Z", "2020-02-28 02:00Z", "2020-03-15 02:00Z", None, None],
    'dicts': [ {'a':10, 'b':20, 'c':'some string'}, None, None, None, None, None], #polars
    'nested_dicts': [{'level_1': {'a':10, 'b':20, 'c':'some string'}}, None, None, None, None, None],
    'lists': [['a','b'], [1,2], None, None, None, None],
    'lists-string': [['a','b'], ['foo', 'bar'], None, None, None, None],
    'lists-int': [[10, 20], [100, 500], [8], None, None, None]}
)
GalleryBuckaroo(obj_df)

In [None]:
BuckarooWidget(obj_df, 
               component_config={'className':'asdf', 
                                 'selectionBackground': 'red'})

In [None]:
obj_df = pd.DataFrame({
    'bools':[True, True, False, False, True, None],
    'ints': [   5,   20,    30,   -10, 7772, None],
    'dicts': [ {'a':10, 'b':20, 'c':'some string'}, None, None, None, None, None], #polars
    'nested_dicts': [{'level_1': {'a':10, 'b':20, 'c':'some string'}}, None, None, None, None, None],
    #'nested_dicts2': pl.Series([{'level_1': {'a':10, 'b':20, 'c':'some string'}}, None, None, None, None, None], dtype=pl.Object)}
    }
)
GalleryBuckaroo(obj_df)

In [None]:
bool_ser = pd.Series([True, True, False, False, True, None])
bool_df = pd.DataFrame({
    'bools_obj_displayer':bool_ser,
    'bools_boolean_displayer': bool_ser})
#fix         #'bools_bool_checkbox_displayer':  {'displayer_args': {'displayer': 'boolean_checkbox'}
BuckarooWidget(
    bool_df, 
    column_config_overrides={
        'bools_obj_displayer':  {'displayer_args': {'displayer': 'obj'}},      
        'bools_boolean_displayer': {'displayer_args': {'displayer': 'boolean'}}}
)

In [None]:
string_ser = pd.Series(["asdf", "qwerty", "really long string, much  much longer", None,  "A"])
string_df = pd.DataFrame({
    'strings_obj_displayer':string_ser,
    'strings_string_displayer':string_ser,
    'strings_string_displayer_max_len':string_ser})
BuckarooWidget(
    string_df, 
    column_config_overrides={
        'strings_obj_displayer':  {'displayer_args': {'displayer': 'obj'}},      
        'strings_string_displayer': {'displayer_args': {'displayer': 'string'}},
        'strings_string_displayer_max_len': {'displayer_args': {'displayer': 'string', 'max_length':15}}
    })

In [None]:
float_ser = pd.Series(
    [5, -8, 13.23, -8.01, -999.345245234, None])

def float_(min_digits, max_digits):
    return {'displayer_args': { 'displayer': 'float', 'min_fraction_digits':min_digits, 'max_fraction_digits': max_digits}}
float_df = pd.DataFrame({
        'float_obj_displayer':float_ser,
        'float_float_displayer_1__3':float_ser,
        'float_float_displayer_0__3':float_ser,
        'float_float_displayer_3__3':float_ser,
        'float_float_displayer_3_13':float_ser})
BuckarooWidget(
    float_df,
    column_config_overrides={
        'float_obj_displayer':  {'displayer_args': {'displayer': 'obj'}},      
        'float_float_displayer_1__3' : float_(1,3),
        'float_float_displayer_0__3' : float_(0,3),
        'float_float_displayer_3__3' : float_(3,3),
        'float_float_displayer_3_13' : float_(3,13)})

In [None]:
datetime_ser = pd.to_datetime(
    pd.Series(["2020-01-01 01:00Z", "2020-01-01 02:00Z", "2020-02-28 02:00Z", "2020-03-15 02:00Z", None]))
datetime_df = pd.DataFrame({
        'timestamp_obj_displayer':datetime_ser,
        'timestamp_datetime_default_displayer':datetime_ser,
        'timestamp_datetime_locale_en-US':datetime_ser,
        'timestamp_datetime_locale_en-US-Long':datetime_ser,
        'timestamp_datetime_locale_en-GB':datetime_ser,})
def locale(locale, args={}):
    return {'displayer_args': {'displayer': 'datetimeLocaleString',
                                'locale':locale,
                                'args':args}}
BuckarooWidget(datetime_df,
    column_config_overrides={
        'timestamp_obj_displayer':  {'displayer_args': {'displayer': 'obj'}},    
        'timestamp_datetime_default_displayer' : {'displayer_args':  {  'displayer': 'datetimeDefault'}},
        'timestamp_datetime_locale_en-US' :locale('en-US'),
        'timestamp_datetime_locale_en-US-Long': locale('en-US', { 'weekday': 'long'}),
        'timestamp_datetime_locale_en-GB' : locale('en-GB')})

In [None]:
link_df = pd.DataFrame({'raw':      ['https://github.com/paddymul/buckaroo', 'https://github.com/pola-rs/polars'],
                    'linkify' : ['https://github.com/paddymul/buckaroo', 'https://github.com/pola-rs/polars']})
BuckarooWidget(link_df,
               column_config_overrides={'linkify': {'displayer_args':  {  'displayer': 'linkify'}}})
#fixme no underline or blue highlighting of links... but they are links

In [None]:
histo_df = pd.DataFrame({
    'names': ['index', 'all_NA', 'half_NA', 'longtail', 'longtail_unique'],
     'histogram_props': ['histogram',
          [{'name': 'NA', 'NA': 100.0}],
          [{'name': 1, 'cat_pop': 44.0}, {'name': 'NA', 'NA': 56.0}],
          [{'name': 'long_97', 'cat_pop': 0.0},
           {'name': 'long_139', 'cat_pop': 0.0},
           {'name': 'long_12', 'cat_pop': 0.0},
           {'name': 'long_134', 'cat_pop': 0.0},
           {'name': 'long_21', 'cat_pop': 0.0},
           {'name': 'long_44', 'cat_pop': 0.0},
           {'name': 'long_58', 'cat_pop': 0.0},
           {'name': 'longtail', 'longtail': 77.0},
           {'name': 'NA', 'NA': 20.0}],
          [{'name': 'long_113', 'cat_pop': 0.0},
           {'name': 'long_116', 'cat_pop': 0.0},
           {'name': 'long_33', 'cat_pop': 0.0},
           {'name': 'long_72', 'cat_pop': 0.0},
           {'name': 'long_122', 'cat_pop': 0.0},
           {'name': 'long_6', 'cat_pop': 0.0},
           {'name': 'long_83', 'cat_pop': 0.0},
           {'name': 'longtail', 'unique': 50.0, 'longtail': 47.0}]]})
from buckaroo.customizations.analysis import TypingStats
from buckaroo.buckaroo_widget import BuckarooWidget
class PdGalleryBuckaroo(BuckarooWidget):
    analysis_klasses = [GalleryStyling, TypingStats]
PdGalleryBuckaroo(histo_df,
                   column_config_overrides={
                    'histogram_props': {'displayer_args': {'displayer': 'histogram'}}})
#Fixme, this doesn't work with polars right now, probably related to the object dtype problem

In [None]:
png_smiley = 'iVBORw0KGgoAAAANSUhEUgAAABgAAAAYCAYAAADgdz34AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAAApgAAAKYB3X3/OAAAABl0RVh0U29mdHdhcmUAd3d3Lmlua3NjYXBlLm9yZ5vuPBoAAANCSURBVEiJtZZPbBtFFMZ/M7ubXdtdb1xSFyeilBapySVU8h8OoFaooFSqiihIVIpQBKci6KEg9Q6H9kovIHoCIVQJJCKE1ENFjnAgcaSGC6rEnxBwA04Tx43t2FnvDAfjkNibxgHxnWb2e/u992bee7tCa00YFsffekFY+nUzFtjW0LrvjRXrCDIAaPLlW0nHL0SsZtVoaF98mLrx3pdhOqLtYPHChahZcYYO7KvPFxvRl5XPp1sN3adWiD1ZAqD6XYK1b/dvE5IWryTt2udLFedwc1+9kLp+vbbpoDh+6TklxBeAi9TL0taeWpdmZzQDry0AcO+jQ12RyohqqoYoo8RDwJrU+qXkjWtfi8Xxt58BdQuwQs9qC/afLwCw8tnQbqYAPsgxE1S6F3EAIXux2oQFKm0ihMsOF71dHYx+f3NND68ghCu1YIoePPQN1pGRABkJ6Bus96CutRZMydTl+TvuiRW1m3n0eDl0vRPcEysqdXn+jsQPsrHMquGeXEaY4Yk4wxWcY5V/9scqOMOVUFthatyTy8QyqwZ+kDURKoMWxNKr2EeqVKcTNOajqKoBgOE28U4tdQl5p5bwCw7BWquaZSzAPlwjlithJtp3pTImSqQRrb2Z8PHGigD4RZuNX6JYj6wj7O4TFLbCO/Mn/m8R+h6rYSUb3ekokRY6f/YukArN979jcW+V/S8g0eT/N3VN3kTqWbQ428m9/8k0P/1aIhF36PccEl6EhOcAUCrXKZXXWS3XKd2vc/TRBG9O5ELC17MmWubD2nKhUKZa26Ba2+D3P+4/MNCFwg59oWVeYhkzgN/JDR8deKBoD7Y+ljEjGZ0sosXVTvbc6RHirr2reNy1OXd6pJsQ+gqjk8VWFYmHrwBzW/n+uMPFiRwHB2I7ih8ciHFxIkd/3Omk5tCDV1t+2nNu5sxxpDFNx+huNhVT3/zMDz8usXC3ddaHBj1GHj/As08fwTS7Kt1HBTmyN29vdwAw+/wbwLVOJ3uAD1wi/dUH7Qei66PfyuRj4Ik9is+hglfbkbfR3cnZm7chlUWLdwmprtCohX4HUtlOcQjLYCu+fzGJH2QRKvP3UNz8bWk1qMxjGTOMThZ3kvgLI5AzFfo379UAAAAASUVORK5CYII=';

In [None]:
img_df = pd.DataFrame({'raw':            [png_smiley, None],
                    'img_displayer' : [png_smiley, None]})
BuckarooWidget(img_df,
               column_config_overrides={
                   'raw':           {'displayer_args': {'displayer': 'string', 'max_length':40}},
                   'img_displayer': {'displayer_args': {'displayer': 'Base64PNGImageDisplayer'}, 'ag_grid_specs' : {'width':150}}})

# Tooltips

In [None]:

bw = BuckarooWidget(
    typed_df, 
    column_config_overrides={
        'str_col':
            {'tooltip_config': { 'tooltip_type':'simple', 'val_column': 'int_col'}}})
bw

In [None]:
bw.df_display_args

# Color_map_config
```ts
export type ColorMap = "BLUE_TO_YELLOW" | "DIVERGING_RED_WHITE_BLUE" | string[];

//ColorMapRules
export interface ColorMapRules {
    color_rule: "color_map";
    map_name: ColorMap;
    //optional, the column to base the ranges on.  the proper histogram_bins must still be sent in for that column
    val_column?: string;
}

//ColorCategorical rules
export interface ColorCategoricalRules {
    color_rule: "color_categorical";
    map_name: ColorMap;
    //optional, the column to base the ranges on.  the proper histogram_bins must still be sent in for that column
    val_column?: string;
}

//if exist_column is not null,  set cell style to condtional_color... used for highlighting changed values or errored_rows
export interface ColorWhenNotNullRules {
    color_rule: "color_not_null";
    conditional_color: string | "red";
    exist_column: string;
}

export interface ColorFromColumn {
    color_rule: "color_from_column";
    col_name: string;
}
```

In [None]:
BuckarooWidget(
    typed_df, 
    column_config_overrides={
        'float_col': {'color_map_config': {
          'color_rule': 'color_map',
          'map_name': 'BLUE_TO_YELLOW',
          'val_column': 'int_col'
        }}})

In [None]:
# you can also pass in your own color_map directly as an array
BuckarooWidget(
    pd.DataFrame({'a':[9, 10, 3,4,5,1, 1]}),
    column_config_overrides={
        'a': {'color_map_config': {
          'color_rule': 'color_map',
          'map_name': ["pink", "#73ae80", "#90b2b3", "#6c83b5", "orange"]
        }}})

In [None]:
# color categorical allows direct indexing into a color map.  
# values that exceed the array length are displayed with the default backgroudn
BuckarooWidget(
    pd.DataFrame({'a':[9, 10, 1,2,3,4,5]}),
    column_config_overrides={
        'a': {'color_map_config': {
          'color_rule': 'color_categorical',
          'map_name': ["pink", "#73ae80", "#90b2b3", "#6c83b5", "orange"]
        }}})

In [None]:
BuckarooWidget(
    typed_df, 
    column_config_overrides={
        'float_col': {'color_map_config': {
          'color_rule': 'color_map',
          'map_name': 'BLUE_TO_YELLOW',
        }}})

In [None]:
error_df = pd.DataFrame({
    'a': [10, 20, 30],
    'err_messages': [None, "a must be less than 19, it is 20", "a must be less than 19, it is 30"]})

BuckarooWidget(
    error_df, 
    column_config_overrides={
        'a': {'color_map_config': {
            'color_rule': 'color_not_null',
            'conditional_color': 'red',
            'exist_column': 'err_messages'}}})

In [None]:
color_df = pd.DataFrame({
    'a': [10, 20, 30],
    'a_colors': ['red', '#d3a', 'green']})

BuckarooWidget(
    color_df, 
    column_config_overrides={
        'a': { 'color_map_config': {
          'color_rule': 'color_from_column',
          'val_column': 'a_colors'}}})

Extra col def type

`ag_grid_specs`
only to be used for very specific hacking.  You can set any primitive property, but can't set function props
https://github.com/ag-grid/ag-grid/blob/latest/packages/ag-grid-community/src/entities/colDef.ts

# Utility Code

In [None]:
#utility code for generating histogram data structures
from buckaroo.buckaroo_widget import BuckarooWidget

NA = pd.NA
def rand_cat(named_p, na_per, N):
    choices, p = [], []
    named_total_per = sum(named_p.values()) + na_per
    total_len = int(np.floor(named_total_per * N))
    if named_total_per > 0:
        for k, v in named_p.items():
            choices.append(k)
            p.append(v/named_total_per)

        choices.append(NA)
        p.append(na_per/named_total_per)    
        return [np.random.choice(choices, p=p) for k in range(total_len)]
    else:
        return []

def random_categorical(named_p, unique_per, na_per, longtail_per, N):
    choice_arr = rand_cat(named_p, na_per, N)
    discrete_choice_len = len(choice_arr)

    longtail_count = int(np.floor(longtail_per * N))//2
    extra_arr = []
    for i in range(longtail_count):
        extra_arr.append("long_%d" % i)
        extra_arr.append("long_%d" % i)

    unique_len = N - (len(extra_arr) + discrete_choice_len)
    #print("discrete_choice_len", discrete_choice_len, "longtail_count", longtail_count, "unique_len", unique_len)
    for i in range(unique_len):
        extra_arr.append("unique_%d" % i)
    all_arr = np.concatenate([choice_arr, extra_arr])
    np.random.shuffle(all_arr)
    return all_arr        
cat_histo_df = pd.DataFrame({
    'all_NA' :          [NA] * N,
    'half_NA' :         random_categorical({1: .5}, unique_per=0,   na_per=.5, longtail_per=.0, N=N),
    'longtail' :        random_categorical({},      unique_per=0,   na_per=.2, longtail_per=.8, N=N),
    'longtail_unique' : random_categorical({},      unique_per=0.5, na_per=.0, longtail_per=.5, N=N),
})
bw = BuckarooWidget(
    cat_histo_df, 
    pinned_rows=[
        { 'primary_key_val': 'dtype',     'displayer_args': { 'displayer': 'obj' } },
        { 'primary_key_val': 'histogram', 'displayer_args': { 'displayer': 'histogram' }},   
    ])
histogram_vals = [x for x in bw.df_data_dict['all_stats'] if x['index'] == 'histogram'][0]
def format_histo(bw):
    histogram_vals = [x for x in bw.df_data_dict['all_stats'] if x['index'] == 'histogram'][0]
    return dict(
        names = list(histogram_vals.keys()),
        histogram_props = list(histogram_vals.values()))
format_histo(bw)