# adding aria to long or wide tables

sometimes tables are large are truncating bt default to save screen and disc real estate.
when this happens, we break the meaning of the columns/row ordering.
we need to supplement the table elements with aria to ensure an accessible experience.

screen reader behavior is ok when dataframes are smaller than the pandas display options.
when they exceed these values then data is truncated and screen readers need extra markup
to accurately navigate the tables based on there real size. 
we'll also highlight a persistent ambiguity between visual dataframe indexing and audible dataframe indexing.

to provide the proper mark up for large tables we need to be rigorous about use of ARIA: `rowindex` `colindex` `rowspan` `colspan`.

this example is a most complex axis case and does not include grouping. 

In [1]:
    import pandas, bs4, enum, numpy, midgy, functools
    get_ipython().display_formatter.formatters["text/html"].for_type(bs4.BeautifulSoup, str);

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas, bs4, enum, numpy, midgy, functools


In [2]:
%%
<style>
:is(.jp-OutputArea-output.jp-RenderedHTMLCommon, .nb-outputs) :is(td,th) {
    border: 1px solid;
}
</style>


<style>
:is(.jp-OutputArea-output.jp-RenderedHTMLCommon, .nb-outputs) :is(td,th) {
    border: 1px solid;
}
</style>


In [3]:
    index = pandas.MultiIndex.from_product([
        ["A", "Z"], ["M", "N", "O"], [1, 2, 3]
    ], names=[*"JKL"])
    (df := pandas.DataFrame(columns=index, index=index).rename_axis(columns=[10, 100, 1000]).head())
    single = df.droplevel((0, 1), 0).droplevel((0, 1), 1).rename_axis(None, axis=1).rename_axis(None, axis=0)
    df

Unnamed: 0_level_0,Unnamed: 1_level_0,10,A,A,A,A,A,A,A,A,A,Z,Z,Z,Z,Z,Z,Z,Z,Z
Unnamed: 0_level_1,Unnamed: 1_level_1,100,M,M,M,N,N,N,O,O,O,M,M,M,N,N,N,O,O,O
Unnamed: 0_level_2,Unnamed: 1_level_2,1000,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3,1,2,3
J,K,L,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3
A,M,1,,,,,,,,,,,,,,,,,,
A,M,2,,,,,,,,,,,,,,,,,,
A,M,3,,,,,,,,,,,,,,,,,,
A,N,1,,,,,,,,,,,,,,,,,,
A,N,2,,,,,,,,,,,,,,,,,,


In [4]:
    def new(tag, 
            string=None,rowindex=None, colindex=None, rowcount=None, colcount=None,rowspan=None,colspan=None,scope=None,
            *, soup=bs4.BeautifulSoup(features="lxml"),**kwargs,):
        data = locals()
        attrs = {
            F"aria-{k}": data.get(k) for k in ["rowindex", "colindex", "rowcount", "colcount"]  if data.get(k)
        }
        attrs.update({
            k: data.get(k) for k in ["rowspan", "colspan", "scope"] if data.get(k)
        })
        attrs.update(kwargs)
        tag = soup.new_tag(tag, attrs=attrs)
        if string:
            tag.append(string)
        return tag

In [5]:
    trow = functools.partial(new, "tr")
    theading = functools.partial(new, "th")
    tdata = functools.partial(new, "td")

In [6]:
locals().setdefault("df2", df)
wide = pandas.concat([pandas.concat([df2]*10, axis=1)]*20)

aria needs to be adjusted by the names.

one of the confusions that will be encountered is that 
natural row and column indexing of tidy data frames will be inconsistent with those announced by assistive technology.
assistive technology indexes tables starting from

In [7]:
    def row_major_at_rows(df): return df.columns.nlevels + len(df) 

In [8]:
    def row_major_at_cols(df): return df.index.nlevels + int(any(df.columns.names)) + len(df.columns) 

In [9]:
    wide.shape

(100, 180)

In [10]:
%% 

    max_columns=pandas.options.display.max_columns
    max_rows= 4#pandas.options.display.max_rows

    df = wide
    soup = bs4.BeautifulSoup(features="lxml")
    ROWS, COLS = any(df.index.names), any(df.columns.names)
    a, b, c, d = len(df.columns), len(df.columns), len(df), len(df)
    WIDE = (a + 1) > max_columns
    LONG = (c + 1) > max_rows
    ARIA = True
    if WIDE:
        a = max_columns//2
        b -=  a
        col_center = b - a
    if LONG:
        c = max_rows//2
        d -= c
        row_center = d - c
    col_ranges = (range(a), range(b, df.shape[1]))
    row_ranges = (range(c), range(d, df.shape[0]))
    soup.append(
        table := new(
            "table", 
            colcount=row_major_at_cols(df) if WIDE else None,
            rowcount=row_major_at_rows(df) if LONG else None))
    for col_level, col_name in enumerate(df.columns.names):
        table.append(tr := trow(rowindex=col_level+1 if ARIA or LONG and row_part else None))
write the index names

        if not col_level:
            if ROWS or not COLS:
                for row_level, row_name in enumerate(df.index.names):
                    tr.append(th := theading(
                        str(row_name) or F"index {row_level}",
                        scope="col", 
                        rowspan=df.columns.nlevels if df.columns.nlevels > 1 else None, 
                        colindex=row_level+1 if ARIA else None))
                    
        if COLS:
write the column names

            tr.append(theading(
                str(col_name) or F"level {col_level}",
                scope="row",
                colindex=df.index.nlevels+1 if ARIA else None))

        for col_part, col_range in enumerate(col_ranges):
            if col_part:
write a header placeholder for several columns.
                
                tr.append(theading("hidden", 
                                   colindex=col_index+2+df.index.nlevels + bool(LONG and WIDE) if ARIA else None,
                                  **{"aria-colspan": col_center}))
            for col_index in col_range:
write the column values

                col_value = df.columns.get_level_values(col_level)[col_index]
                tr.append(theading(str(col_value), scope="col", colindex=df.index.nlevels + int(ROWS and COLS) + col_index + 1 if ARIA or WIDE and col_part else None))


    for row_part, row_range in enumerate(row_ranges):
        if row_part:
            table.append(
                tr := trow(rowindex=row_index+2+df.columns.nlevels, **{"aria-rowspan": row_center})
            )
            for row_level in range(df.index.nlevels):
                tr.append(theading(
                    "hidden",
                    colindex=row_level+1
                ))
            if ROWS and COLS:
                tr.append(tdata("empty", colindex=row_level+2))
            for col_part, col_range in enumerate(col_ranges):
                if col_part:
                    tr.append(
                        tdata("hidden", colindex=col_index+2+df.index.nlevels+int(ROWS and COLS), **{"aria-rowspan": row_center, "aria-colspan": col_center}),
                        
                    )
                for col_index in col_range:
                    tr.append(
                        tdata(
                            "hidden",
                            colindex=col_index + 1 + df.index.nlevels + int(ROWS and COLS)))
        
        for row_index in row_range:
            table.append(
                tr := trow(rowindex=row_index+1+df.columns.nlevels)
            )        
            for row_level in range(df.index.nlevels):
                tr.append(theading(
                    str(df.index.get_level_values(row_level)[row_index]),
                    colindex=row_level+1 if ARIA else None,
                    scope="row"
                ))
            if ROWS and COLS:
                tr.append(tdata("empty", colindex=row_level+2))
            for col_part, col_range in enumerate(col_ranges):
                if col_part:
                    tr.append(
                        tdata("hidden", colindex=col_index+2+ df.index.nlevels + int(ROWS and COLS),
                             **{"aria-colspan": col_center})
                    )
                for col_index in col_range:
                    tr.append(
                        tdata(
                            str(df.iloc[row_index, col_index]),
                            colindex=col_index + 1 + df.index.nlevels + int(ROWS and COLS)))
    soup

J,K,L,10,A,A,A,A,A,A,A,A,A,Z,hidden,A,Z,Z,Z,Z,Z,Z,Z,Z,Z
J,K,L,100,M,M,M,N,N,N,O,O,O,M,hidden,O,M,M,M,N,N,N,O,O,O
J,K,L,1000,1,2,3,1,2,3,1,2,3,1,hidden,3.1,1.1,2,3,1,2,3,1,2,3
A,M,1,empty,,,,,,,,,,,hidden,,,,,,,,,,
A,M,2,empty,,,,,,,,,,,hidden,,,,,,,,,,
hidden,hidden,hidden,empty,hidden,hidden,hidden,hidden,hidden,hidden,hidden,hidden,hidden,hidden,hidden,hidden,hidden,hidden,hidden,hidden,hidden,hidden,hidden,hidden,hidden
A,N,1,empty,,,,,,,,,,,hidden,,,,,,,,,,
A,N,2,empty,,,,,,,,,,,hidden,,,,,,,,,,
