In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.DataFrame(np.random.rand(15, 5), columns=list('abcde'))

First try - this doesn't work:

In [3]:
def wrap(df, top, col, smallest, float_format):
    top_N = df[col].sort_values(ascending=smallest)[:top]

    def do_format(value):
        # print(f'{value} in {top_N}?')
        if value == top_N.iloc[0]:
            return f'\\textbf{"{"}{float_format % value}{"}"}'
        elif value in top_N[1:]:
            return f'\\underline{"{"}{float_format % value}{"}"}'
        else:
            return float_format % value
    return do_format

def highlight_best_formatters(df, top=3, smallest=False, float_format='%.3f'):
    formatters = {}
    for col in df.columns:            
        formatters[col] = wrap(df, top, col, smallest, float_format)
    return formatters

In [4]:
print(df.sort_values('a').to_latex(formatters=highlight_best_formatters(df)))

\begin{tabular}{lrrrrr}
\toprule
{} &              a &              b &              c &              d &              e \\
\midrule
6  &          0.001 &          0.553 &          0.575 &          0.234 &          0.376 \\
0  &          0.062 &          0.654 & \textbackslash textbf\{0.978\} & \textbackslash textbf\{0.979\} &          0.150 \\
5  &          0.069 &          0.246 &          0.620 &          0.540 &          0.245 \\
12 &          0.266 & \textbackslash textbf\{0.991\} &          0.558 &          0.561 &          0.813 \\
4  &          0.288 &          0.857 &          0.352 &          0.022 &          0.393 \\
8  &          0.330 &          0.447 &          0.263 &          0.789 &          0.379 \\
14 &          0.549 &          0.378 &          0.614 &          0.065 &          0.442 \\
3  &          0.599 &          0.154 &          0.341 &          0.024 &          0.427 \\
1  &          0.671 &          0.506 &          0.063 &          0.912 &          0.018 \\


Why?

In [5]:
top = df['a'].sort_values()[:5]

In [6]:
[df['a'][i] in top for i in range(len(df))]

[False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False,
 False]

In [7]:
[df['a'][i] in set(top) for i in range(len(df))]

[True,
 False,
 False,
 False,
 True,
 True,
 True,
 False,
 False,
 False,
 False,
 False,
 True,
 False,
 False]

New way:

In [8]:
def _wrap(df, top, col, smallest, float_format):
    """If this was inlined, do_format would bind to the variables in the other function."""
    top_N = df[col].sort_values(ascending=smallest)[:top]
    top = set(top_N)
    tip = max(top_N)
    
    def do_format(value):
        # print(f'{value} in {top_N}?')
        if value == tip:
            return f'\\textbf{"{"}{float_format % value}{"}"}'
        elif value in top:
            return f'\\underline{"{"}{float_format % value}{"}"}'
        else:
            return float_format % value
    return do_format

def highlight_best_formatters(df, top=3, smallest=False, float_format='%.3f'):
    formatters = {}
    for col in df.columns:            
        formatters[col] = _wrap(df, top, col, smallest, float_format)
    return formatters

In [9]:
print(df.sort_values('a').to_latex(formatters=highlight_best_formatters(df)))

\begin{tabular}{lrrrrr}
\toprule
{} &                 a &                 b &                 c &                 d &                 e \\
\midrule
6  &             0.001 &             0.553 &             0.575 &             0.234 &             0.376 \\
0  &             0.062 &             0.654 &    \textbackslash textbf\{0.978\} &    \textbackslash textbf\{0.979\} &             0.150 \\
5  &             0.069 &             0.246 &             0.620 &             0.540 &             0.245 \\
12 &             0.266 &    \textbackslash textbf\{0.991\} &             0.558 &             0.561 &             0.813 \\
4  &             0.288 & \textbackslash underline\{0.857\} &             0.352 &             0.022 &             0.393 \\
8  &             0.330 &             0.447 &             0.263 &             0.789 &             0.379 \\
14 &             0.549 &             0.378 &             0.614 &             0.065 &             0.442 \\
3  &             0.599 &             0.154 &  

Not there yet - `\textbackslash `?

In [10]:
print(df.sort_values('a').to_latex(formatters=highlight_best_formatters(df)).replace('\\textbackslash ', '\\'))

\begin{tabular}{lrrrrr}
\toprule
{} &                 a &                 b &                 c &                 d &                 e \\
\midrule
6  &             0.001 &             0.553 &             0.575 &             0.234 &             0.376 \\
0  &             0.062 &             0.654 &    \textbf\{0.978\} &    \textbf\{0.979\} &             0.150 \\
5  &             0.069 &             0.246 &             0.620 &             0.540 &             0.245 \\
12 &             0.266 &    \textbf\{0.991\} &             0.558 &             0.561 &             0.813 \\
4  &             0.288 & \underline\{0.857\} &             0.352 &             0.022 &             0.393 \\
8  &             0.330 &             0.447 &             0.263 &             0.789 &             0.379 \\
14 &             0.549 &             0.378 &             0.614 &             0.065 &             0.442 \\
3  &             0.599 &             0.154 &             0.341 &             0.024 &             0.427

No, this is still wrong - the `{` and `}` are escaped. Not elegant, but we can still prevail:

In [11]:
print(
    df.sort_values('a').to_latex(formatters=highlight_best_formatters(df)
    ).replace('\\textbackslash ', '\\'           
    ).replace('\\{', '{'
    ).replace('\\}', '}'
    )
)

\begin{tabular}{lrrrrr}
\toprule
{} &                 a &                 b &                 c &                 d &                 e \\
\midrule
6  &             0.001 &             0.553 &             0.575 &             0.234 &             0.376 \\
0  &             0.062 &             0.654 &    \textbf{0.978} &    \textbf{0.979} &             0.150 \\
5  &             0.069 &             0.246 &             0.620 &             0.540 &             0.245 \\
12 &             0.266 &    \textbf{0.991} &             0.558 &             0.561 &             0.813 \\
4  &             0.288 & \underline{0.857} &             0.352 &             0.022 &             0.393 \\
8  &             0.330 &             0.447 &             0.263 &             0.789 &             0.379 \\
14 &             0.549 &             0.378 &             0.614 &             0.065 &             0.442 \\
3  &             0.599 &             0.154 &             0.341 &             0.024 &             0.427 \\
1  &

In [12]:
# just curious
highlight_best_formatters(df)

{'a': <function __main__._wrap.<locals>.do_format(value)>,
 'b': <function __main__._wrap.<locals>.do_format(value)>,
 'c': <function __main__._wrap.<locals>.do_format(value)>,
 'd': <function __main__._wrap.<locals>.do_format(value)>,
 'e': <function __main__._wrap.<locals>.do_format(value)>}

In [13]:
print(df.sort_values('a').to_markdown())

|    |         a |        b |         c |         d |         e |
|---:|----------:|---------:|----------:|----------:|----------:|
|  6 | 0.0013934 | 0.553195 | 0.575358  | 0.233873  | 0.375585  |
|  0 | 0.0621873 | 0.653801 | 0.977735  | 0.978553  | 0.149937  |
|  5 | 0.069436  | 0.246411 | 0.619638  | 0.540077  | 0.245042  |
| 12 | 0.265707  | 0.991039 | 0.557707  | 0.561196  | 0.813315  |
|  4 | 0.287694  | 0.857394 | 0.351649  | 0.0224172 | 0.392871  |
|  8 | 0.330043  | 0.446655 | 0.262996  | 0.789314  | 0.379058  |
| 14 | 0.54936   | 0.37831  | 0.613509  | 0.0646641 | 0.441517  |
|  3 | 0.598793  | 0.153997 | 0.340737  | 0.0240907 | 0.426835  |
|  1 | 0.671455  | 0.505958 | 0.0631812 | 0.91176   | 0.0181395 |
|  9 | 0.6835    | 0.269331 | 0.77331   | 0.961203  | 0.39736   |
| 11 | 0.748438  | 0.904598 | 0.557228  | 0.427526  | 0.835697  |
|  7 | 0.784577  | 0.282613 | 0.293526  | 0.104375  | 0.830668  |
| 10 | 0.846529  | 0.221557 | 0.806278  | 0.687463  | 0.937165  |
|  2 | 0.8

In [14]:
from IPython.display import Markdown

In [15]:
Markdown(df.sort_values('a').to_markdown())

|    |         a |        b |         c |         d |         e |
|---:|----------:|---------:|----------:|----------:|----------:|
|  6 | 0.0013934 | 0.553195 | 0.575358  | 0.233873  | 0.375585  |
|  0 | 0.0621873 | 0.653801 | 0.977735  | 0.978553  | 0.149937  |
|  5 | 0.069436  | 0.246411 | 0.619638  | 0.540077  | 0.245042  |
| 12 | 0.265707  | 0.991039 | 0.557707  | 0.561196  | 0.813315  |
|  4 | 0.287694  | 0.857394 | 0.351649  | 0.0224172 | 0.392871  |
|  8 | 0.330043  | 0.446655 | 0.262996  | 0.789314  | 0.379058  |
| 14 | 0.54936   | 0.37831  | 0.613509  | 0.0646641 | 0.441517  |
|  3 | 0.598793  | 0.153997 | 0.340737  | 0.0240907 | 0.426835  |
|  1 | 0.671455  | 0.505958 | 0.0631812 | 0.91176   | 0.0181395 |
|  9 | 0.6835    | 0.269331 | 0.77331   | 0.961203  | 0.39736   |
| 11 | 0.748438  | 0.904598 | 0.557228  | 0.427526  | 0.835697  |
|  7 | 0.784577  | 0.282613 | 0.293526  | 0.104375  | 0.830668  |
| 10 | 0.846529  | 0.221557 | 0.806278  | 0.687463  | 0.937165  |
|  2 | 0.886484  | 0.741416 | 0.183655  | 0.020167  | 0.416224  |
| 13 | 0.917943  | 0.528622 | 0.771195  | 0.756434  | 0.572145  |

In [16]:
Markdown(df.sort_values('a').to_markdown(formatters=highlight_best_formatters(df)))

TypeError: tabulate() got an unexpected keyword argument 'formatters'

So, Markdown doesn't allow for formatters. But then, LaTeX didn't have them for what we're doing either.

In [None]:
def to_latex_topN(df, top=3, print=print):
    output = df.to_latex(formatters=highlight_best_formatters(df, top=top)
        ).replace('\\textbackslash ', '\\'           
        ).replace('\\{', '{'
        ).replace('\\}', '}')
    if print:
        print(output)
        return None
    return output


In [None]:
to_latex_topN(df)

In [None]:
to_latex_topN(df, print=False)

### Gotchas
* the alignment of output is done before escaping (this will show with undeerscores...
* what about other arguments to `to_latex`? (like `caption`) - pass them.