In [None]:
import pandas as pd
import numpy as np
from buckaroo.dataflow_traditional import SimpleStylingAnalysis
from buckaroo.pluggable_analysis_framework.pluggable_analysis_framework import ColAnalysis
import polars as pl
from buckaroo.polars_buckaroo import PolarsBuckarooWidget

In [None]:
ROWS = 200
typed_df = pd.DataFrame({'int_col':np.random.randint(1,50, ROWS), 'float_col': np.random.randint(1,30, ROWS)/.7,
                         "str_col": ["foobar"]* ROWS
                        })
typed_df = pl.from_pandas(typed_df)

In [None]:
class SummaryStatsAnalysis(SimpleStylingAnalysis):
    pinned_rows = [
        obj_('dtype'),
        float_('min'),
        #float_('median'),
        float_('mean'),
        float_('max'),
        float_('unique_count', 0),
        float_('distinct_count', 0),
        float_('empty_count', 0)
    ]
    df_display_name = "summary"
    data_key = "empty"
    summary_stats_key= 'all_stats'
base_a_klasses = PolarsBuckarooWidget.analysis_klasses.copy()
base_a_klasses.append(SummaryStatsAnalysis)
class SummaryBuckarooWidget(PolarsBuckarooWidget):
    analysis_klasses = base_a_klasses
sbw = SummaryBuckarooWidget(typed_df)
#also lets do some hacking so that we start with the summary stats view
bstate = sbw.buckaroo_state.copy()
bstate['df_display'] = 'summary'
sbw.buckaroo_state= bstate
sbw

In [None]:
bw2 = PolarsBuckarooWidget(
    typed_df, 
    debug=False,
    column_config_overrides={
        'float_col':
            {'displayer_args': { 'displayer': 'float', 'minimumFractionDigits':0, 'maximumFractionDigits':3}}})
bw2

Now we are going to force `float_col` to be displayed with a 'float' displayer
notice how the decimal point aligns as opposed to above where 10 is floored without a decimal portion

Let's add a tooltip to str_col, with the value of int_col

In [None]:
bw3 = PolarsBuckarooWidget(
    typed_df, 
    column_config_overrides={
        'str_col':
            {'tooltip_config': { 'tooltip_type':'simple', 'val_column': 'int_col'}}})
bw3

In [None]:
base_a_klasses = PolarsBuckarooWidget.analysis_klasses.copy()
base_a_klasses.extend([SimpleStylingAnalysis])
class SimpleBuckarooWidget(PolarsBuckarooWidget):
    analysis_klasses = base_a_klasses
    
bw3 = SimpleBuckarooWidget(
    typed_df, 
    column_config_overrides={
        'float_col': {'color_map_config': {
          'color_rule': 'color_map',
          'map_name': 'BLUE_TO_YELLOW',
        }}})
bw3

Now lets color int_col based on the range of float_col

In [None]:
bw3 = PolarsBuckarooWidget(
    typed_df, 
    column_config_overrides={
        'int_col': {'color_map_config': {
            'color_rule': 'color_map',
            'map_name': 'DIVERGING_RED_WHITE_BLUE',
            'val_column': 'float_col'
        }}})
bw3

Let's hide a column. Note this still has the data for that column sent to the frontend, and it is still accessible for color_maps and tooltips.
A note about hiding columns.  It only makes sense to hide columns from functions with access to the whole of a dataframe.
The only reason to hide a column (as opposed to remove it from the dataframe) is to use the values for tooltips or colormaps of another column

In [None]:
bw_ = PolarsBuckarooWidget(
    typed_df, 
    column_config_overrides={
        'int_col': {'merge_rule': 'hidden'}})
bw_

Let's look at pinned_rows, they can be modified by setting `pinned_rows` on Buckaroo Instaniation

In [None]:
bw = PolarsBuckarooWidget(
    typed_df, 
    pinned_rows=[
        { 'primary_key_val': 'dtype',     'displayer_args': { 'displayer': 'obj' } },
        { 'primary_key_val': 'histogram', 'displayer_args': { 'displayer': 'histogram' }},   
    ])
bw

Pinned rows reads from summary_stats, based on `primary_key_val`.  You can list all summary_stats_keys like this

In [None]:
[x['index'] for x in bw.df_data_dict['all_stats']]

You can even display histograms in regular cells if 'histogram' is properly constructed

In [None]:
histogram_vals = [x for x in bw.df_data_dict['all_stats'] if x['index'] == 'histogram'][0]
histogram_vals

In [None]:
hist_df = pl.DataFrame({'a':[20, 30],  'hist_col':[  histogram_vals['int_col'], histogram_vals['float_col']]})
hist_bw = PolarsBuckarooWidget(hist_df, 
                                column_config_overrides={
                                              'hist_col': {'displayer_args' : {'displayer': 'histogram' }}})
hist_bw

Adding alternate styling methods

Buckaroo encourages using many opinionated analysis that can be quickly cycled through

Here we will add to pinned_row configs

In [None]:
class SummaryStatsAnalysis(SimpleStylingAnalysis):
    pinned_rows = [
        { 'primary_key_val': 'dtype',     'displayer_args': { 'displayer': 'obj' } },
        { 'primary_key_val': 'histogram', 'displayer_args': { 'displayer': 'histogram' }},   
    ]
    df_display_name = "summary5"
    data_key = "empty"
    summary_stats_key= 'all_stats'
base_a_klasses = PolarsBuckarooWidget.analysis_klasses.copy()
base_a_klasses.append(SummaryStatsAnalysis)
class SummaryBuckarooWidget(PolarsBuckarooWidget):
    analysis_klasses = base_a_klasses
SummaryBuckarooWidget(typed_df)

In [None]:
# it's annoying to type out all of those pinned rows, lets make some convienence functions
def obj_(pkey):
    return {'primary_key_val': pkey, 'displayer_args': { 'displayer': 'obj' } }

def float_(pkey, digits=3):
    return {'primary_key_val': pkey, 
            'displayer_args': { 'displayer': 'float', 'minimumFractionDigits':digits, 'maximumFractionDigits':digits}}

class SummaryStatsAnalysis1(SimpleStylingAnalysis):
    pinned_rows = [
        { 'primary_key_val': 'dtype',     'displayer_args': { 'displayer': 'obj' } },
        { 'primary_key_val': 'histogram', 'displayer_args': { 'displayer': 'histogram' }},   
    ]
    df_display_name = "summary1"
    data_key = "empty"
    summary_stats_key= 'all_stats'
class SummaryStatsAnalysis(SimpleStylingAnalysis):
    pinned_rows = [
        obj_('dtype'),
        float_('min'),
        #float_('median'),
        float_('mean'),
        float_('max'),
    ]
    df_display_name = "summary"
    data_key = "empty"
    summary_stats_key= 'all_stats'
base_a_klasses = PolarsBuckarooWidget.analysis_klasses.copy()
base_a_klasses.extend([SummaryStatsAnalysis1, SummaryStatsAnalysis])
class SummaryBuckarooWidget(PolarsBuckarooWidget):
    analysis_klasses = base_a_klasses
sbw = SummaryBuckarooWidget(typed_df)
#also lets do some hacking so that we start with the summary stats view
bstate = sbw.buckaroo_state.copy()
bstate['df_display'] = 'summary1'
sbw.buckaroo_state= bstate
sbw

In [None]:
class SummaryStatsAnalysis(SimpleStylingAnalysis):
    pinned_rows = [
        obj_('dtype'),
        float_('min'),
        #float_('median'),
        float_('mean'),
        float_('max'),
        float_('unique_count', 0),
        float_('distinct_count', 0),
        float_('empty_count', 0)
    ]
    df_display_name = "summary"
    data_key = "empty"
    summary_stats_key= 'all_stats'
base_a_klasses = PolarsBuckarooWidget.analysis_klasses.copy()
base_a_klasses.append(SummaryStatsAnalysis)
class SummaryBuckarooWidget(PolarsBuckarooWidget):
    analysis_klasses = base_a_klasses
sbw = SummaryBuckarooWidget(typed_df)
#also lets do some hacking so that we start with the summary stats view
bstate = sbw.buckaroo_state.copy()
bstate['df_display'] = 'summary'
sbw.buckaroo_state= bstate
sbw

# lets add a post processing method

In [None]:
from polars import functions as F
from buckaroo.pluggable_analysis_framework.polars_analysis_management import PolarsAnalysis

In [None]:
class ValueCountPostProcessing(PolarsAnalysis):
    @classmethod
    def post_process_df(kls, df):
        result_df = df.select(
            F.all().value_counts().implode().list.gather(pl.arange(0, 10), null_on_oob=True).explode().struct.rename_fields(['val', 'unused_count']).struct.field('val').prefix('val_'),
            F.all().value_counts().implode().list.gather(pl.arange(0, 10), null_on_oob=True).explode().struct.field('count').prefix('count_'))
        return [result_df, {}]
    post_processing_method = "value_counts"
class TransposeProcessing(ColAnalysis):
    @classmethod
    def post_process_df(kls, df):
        return [df.transpose(), {}]
    post_processing_method = "transpose"
base_a_klasses = PolarsBuckarooWidget.analysis_klasses.copy()
base_a_klasses.extend([SimpleStylingAnalysis, ValueCountPostProcessing, TransposeProcessing])
class VCBuckarooWidget(PolarsBuckarooWidget):
    analysis_klasses = base_a_klasses
vcb = VCBuckarooWidget(typed_df, debug=False)
vcb

In [None]:
class AdaptingStylingAnalysis(SimpleStylingAnalysis):
    requires_summary = ["histogram", "is_numeric", "dtype", "is_integer"]
    pinned_rows = [
        obj_('dtype'),
        {'primary_key_val': 'histogram', 'displayer_args': { 'displayer': 'histogram' }}]

    @staticmethod
    def single_sd_to_column_config(col, sd):
        digits = 3
        if sd['is_integer']:
            disp = {'displayer': 'float', 'minimumFractionDigits':0, 'maximumFractionDigits':0}
        elif sd['is_numeric']:
            disp = {'displayer': 'float', 'minimumFractionDigits':digits, 'maximumFractionDigits':digits}
        else:
            disp = {'displayer': 'obj'}
        return {'col_name':col, 'displayer_args': disp }

base_a_klasses = PolarsBuckarooWidget.analysis_klasses.copy()
base_a_klasses.extend([AdaptingStylingAnalysis, ValueCountPostProcessing])
class ABuckarooWidget(PolarsBuckarooWidget):
    analysis_klasses = base_a_klasses
acb = ABuckarooWidget(typed_df)
acb