# Introduction

This is a simple notebook that takes data scrapped from ODS/TableGen, and give statistics on the dialects.
Note that some manual translation was made to first translate the scrapped C++ verifiers into an IRDL representation.

# Setup

Import the relevant packages:

In [1]:
from analyze_tablegen.main import *
import ipywidgets as widgets
import matplotlib.pyplot as plt
import numpy as np

Get the cached data from TableGen:

In [2]:
import json
    
f = open("analyze_tablegen/tablegen_data.json", "r")
stats = get_stats_from_json(f.read())
dialects = stats.dialects
dialect_names = dialects.keys()

A simple bar plotter:

In [3]:
@dataclass
class PlotData:
    keys: List[str]
    values: List[Union[float, int]]
    title: Optional[str]
    
    def with_integer_keys(values, title=None):
        return PlotData([str(i) for i in range(len(values))], values, title=title)

@dataclass
class MultibarPlotData:
    keys: List[str]
    values: Dict[str, List[int]]
    title: Optional[str]
    
    def with_integer_keys(values, title=None):
        return PlotData([str(i) for i in range(len(next(iter(values.items()))[1]))], values, title=title)
    
# Map raw data gathered from get_global_*_distribution to plot data
def map_dialect_data_to_plot(distribution, title=None):
    all_dialects = MultibarPlotData.with_integer_keys(distribution[1], title=title)
    dialects_sum = PlotData.with_integer_keys(distribution[0], title=title)
    dialects = {dialect_name: PlotData.with_integer_keys(dialect_data, title=title) for dialect_name, dialect_data in distribution[1].items()}
    return (all_dialects, dialects_sum, dialects)

def get_op_plot_stats(stats, lam, title=None):
    return map_dialect_data_to_plot(get_global_op_distribution(stats, lam), title=title)

def get_type_plot_stats(stats, lam, title=None):
    return map_dialect_data_to_plot(get_global_type_distribution(stats, lam), title=title)

def get_attr_plot_stats(stats, lam, title=None):
    return map_dialect_data_to_plot(get_global_attr_distribution(stats, lam), title=title)

def plot_simple_consecutive_integer_data(plot_data):
    fig = plt.bar(plot_data.keys, plot_data.values)
    if plot_data.title is not None:
        plt.title(plot_data.title)
    plt.show()
    plt.close()
    
def plot_multibar_graph(plot_data, colors=None, size=(12, 6)):
    results = [r for r in plot_data.values.items()]
    labels = [r[0] for r in results]
    data = np.array([r[1] for r in results])
    data_cum = data.cumsum(axis=1)
    if colors is None:
        colors = plt.get_cmap('RdYlGn')(np.linspace(1, 0.5, data.shape[1]))

    fig = plt.figure(figsize=size)
    ax = plt.axes()
    ax.invert_yaxis()
    ax.set_xlim(0, np.sum(data, axis=1).max())

    for i, (colname, color) in enumerate(zip(plot_data.keys, colors)):
        widths = data[:, i]
        starts = data_cum[:, i] - widths
        rects = ax.barh(labels, widths, left=starts, height=0.5,
                        label=colname, color=color)

        r, g, b, _ = color
    ax.legend(ncol=len(plot_data.keys), bbox_to_anchor=(0, 1),
              loc='lower left', fontsize='small')
    if plot_data.title is not None:
        plt.title(plot_data.title)
    fig.tight_layout()
    plt.show()
    plt.close()

def category_plot(distribution):
    category_select = widgets.ToggleButtons(
        options=distribution.keys(),
        description='Category:',
        disabled=False,
        button_style='',
    )

    @widgets.interact(category=category_select)
    def category_plot_(category):
        all_distribution = distribution[category][0]
        distribution_sum = distribution[category][1]
        per_dialect_distribution = distribution[category][2]

        plot_multibar_graph(all_distribution)
        plot_simple_consecutive_integer_data(distribution_sum)

        toggle_select =widgets.ToggleButtons(
            options=dialect_names,
            description='Dialect:',
            disabled=False,
            button_style='', # 'success', 'info', 'warning', 'danger' or ''
        )

        @widgets.interact(dialect_name=toggle_select)
        def plot_dialect(dialect_name):
            plot_simple_consecutive_integer_data(per_dialect_distribution[dialect_name])

# Some general statistics
## General statistics on operations

In [4]:
distribution = dict()
distribution["operands"] = get_op_plot_stats(stats, lambda x: x.numOperands, title="Distribution of the number of operand definitions")
distribution["variadic operands"] = get_op_plot_stats(stats, lambda x: sum([isinstance(operand.constraint, VariadicConstraint) for operand in x.operands]), title="Distribution of the number of variadic operand definitions")
distribution["results"] = get_op_plot_stats(stats, lambda x: x.numResults, title="Distribution of the number of result definitions")
distribution["variadic results"] = get_op_plot_stats(stats, lambda x: sum([isinstance(result.constraint, VariadicConstraint) for result in x.results]), title="Distribution of the number of variadic result definitions")
distribution["multiple results"] = get_op_plot_stats(stats, lambda x: int(x.numResults > 1 or sum([isinstance(result.constraint, VariadicConstraint) for result in x.results]) > 0), title="Distribution of the number of variadic result definitions")
distribution["regions"] = get_op_plot_stats(stats, lambda x: x.numRegions, title="Distribution of the number of region definitions")
distribution["attributes"] = get_op_plot_stats(stats, lambda x: len(x.attributes), title="Distribution of the number of attribute definitions")
distribution["C++ printer"] = get_op_plot_stats(stats, lambda x: 1 - int(x.hasAssemblyFormat), title="Distribution of number of C++ printers")
distribution["C++ verifier"] = get_op_plot_stats(stats, lambda x: int(x.hasVerifier), title="Distribution of the number of C++ verifiers")
distribution["traits"] = get_op_plot_stats(stats, lambda x: len(x.traits), title="Distribution of the number of traits")
distribution["interfaces"] = get_op_plot_stats(stats, lambda x: len(x.interfaces), title="Distribution of the number of interfaces")

category_plot(distribution)

interactive(children=(ToggleButtons(description='Category:', options=('operands', 'variadic operands', 'result…

## General statistics on types

In [5]:
distribution = dict()
distribution["parameters"] = get_type_plot_stats(stats, lambda x: len(x.parameters), "Distribution of the number of parameter definitions")
distribution["traits"] = get_type_plot_stats(stats, lambda x: len(x.traits), "Distribution of the number of traits")
distribution["interfaces"] = get_type_plot_stats(stats, lambda x: len(x.interfaces), "Distribution of the number of interfaces")

category_plot(distribution)

interactive(children=(ToggleButtons(description='Category:', options=('parameters', 'traits', 'interfaces'), v…

## General statistics on attributes

In [6]:
distribution = dict()
distribution["parameters"] = get_type_plot_stats(stats, lambda x: len(x.parameters), "Distribution of the number of parameters")
distribution["traits"] = get_type_plot_stats(stats, lambda x: len(x.traits), "Distribution of the number of traits")
distribution["interfaces"] = get_type_plot_stats(stats, lambda x: len(x.interfaces), "Distribution of the number of interfaces")

category_plot(distribution)

interactive(children=(ToggleButtons(description='Category:', options=('parameters', 'traits', 'interfaces'), v…

# "Declarativeness" of current dialects

In [13]:
distribution = dict()
# TODO check constraints in types/attributes
distribution["types"] = get_type_plot_stats(stats, lambda x: int(x.is_declarative()), "Distribution of types that can be expressed in IRDL")
distribution["attributes"] = get_attr_plot_stats(stats, lambda x: int(x.is_declarative()), "Distribution of attributes that can be expressed in IRDL")
distribution["op has C++ parser"] = get_op_plot_stats(stats, lambda x: int(not x.hasAssemblyFormat), "Distribution of the number of C++ printer/parsers on operations")
distribution["op has C++ verifier"] = get_op_plot_stats(stats, lambda x: int(x.hasVerifier), "Distribution of the number of C++ verifiers on operations")
# Number of operations that have declarative operands/results
distribution["decl operands/results"] = get_op_plot_stats(stats, lambda x: int(x.is_operands_results_attrs_declarative()), "How many operations have operands/results expressable in IRDL")
# Number of operations that have declarative traits
distribution["decl op traits"] = get_op_plot_stats(stats, lambda x: int(x.is_traits_declarative()), "How many operations have traits expressable in IRDL")
# Number of operations that have declarative operands, results, and no C++ verifiers
distribution["decl ops without traits/interfaces"] = get_op_plot_stats(stats, lambda x: int(x.is_declarative(check_traits=False, check_interfaces=False)), "How many operations can be expressed in IRDL without traits and interfaces")
distribution["decl ops without interfaces"] = get_op_plot_stats(stats, lambda x: int(x.is_declarative(check_traits=True, check_interfaces=False)), "How many operations can be expressed in IRDL without interfaces")
distribution["decl ops"] = get_op_plot_stats(stats, lambda x: int(x.is_declarative(check_traits=True, check_interfaces=True)), "How many operations can be fully expressed in IRDL")

category_plot(distribution)

interactive(children=(ToggleButtons(description='Category:', options=('types', 'attributes', 'op has C++ parse…

# Printing dialects in IRDL

In [52]:
import analyze_tablegen.simplifier as simplifier

stats_optimized = simplifier.simplify(stats)

category_select = widgets.ToggleButtons(
    options=dialect_names,
    description='Dialect:',
    disabled=False,
    button_style='',
)

@widgets.interact(dialect_name=category_select)
def print_dialect(dialect_name):
    
    optimization_select = widgets.ToggleButtons(
        options=["unoptimized", "optimized"],
        description='',
        disabled=False,
        button_style='',
    )
    
    @widgets.interact(optimization=optimization_select)
    def print_optimized(optimization):
        if optimization == "optimized":
            stats_optimized.dialects[dialect_name].print()
        else:
            stats.dialects[dialect_name].print()

interactive(children=(ToggleButtons(description='Dialect:', options=('builtin', 'sparse_tensor', 'tosa', 'scf'…