# table

> a table object for use in generating biobibs and CVs


In [123]:
#| default_exp table

In [124]:
#| hide
#| export
import pandas as pd
import numpy as np
import time
import re
from biobib.sheet import Sheet
from jinja2 import Environment, FileSystemLoader, Template
from read_google_sheet import get_dataframe
from read_google_sheet import biobib_sheet as sh
import copy
from nbdev import show_doc
from fastcore.test import *
from fastcore.test import *
from fastcore.basics import *
from fastcore.foundation import *

In [125]:
#| export
def safe_float(val):
    try:
        return float(val)
    except ValueError:
        return 0


### Create a latex_env 

We need to create and environment for generating formated latex content. This environment specifies how a `block` will be initiated and terminated in jinja, as well as how variables and comments will be defined.

In [126]:
#| export
latex_env = Environment(
    extensions=['jinja2.ext.do'],
    block_start_string='\BLOCK{',
    block_end_string='}',
    variable_start_string='\VAR{',
    variable_end_string='}',
    comment_start_string='\#{',
    comment_end_string='}',
    line_statement_prefix='%%',
    line_comment_prefix='%#',
    trim_blocks=True,
    autoescape=False,
    loader=FileSystemLoader('templates'))


### Author definitions

Authorship data is stored in a series of columns, labelled `A1`, `A2`, etc... up to a maximum of 30 authors (`A30`). If more authors are needed for an article, then the `max_author` variable needs to be changed.

In [127]:
max_author = 30 # maximum number of authors in any manuscript

# define the author columns (A1 - A30)
author_cols = ['A' + str(i+1) for i in range(max_author)]


In [128]:
test_eq(len(author_cols),max_author)

Set some keys for specifting author information

In [129]:
#| export
decorate_dict = {
    'Undergrad Author': "-UUUU-",
    'Visitor Author': "-VVVV-",
    'PhD Committee Member': "-MMMM-",
    'Graduate Advisee': "-AAAA-",
    'Postdoctoral Advisee': "-PPPP-"
}

## Helper functions


In [130]:
#| export
def to_int(value:int|str # value to coerce
          )->int: # value
    """ coerces any value to an integer representation """
    try:
        v = str(int(value))
    except:
        v = value
    return v

In [131]:
show_doc(to_int)

---

[source](https://github.com/kcaylor/biobib/blob/master/biobib/table.py#L49){target="_blank" style="float:right; font-size:smaller"}

### to_int

>      to_int (value:int|str)

coerces any value to an integer representation

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| value | int \| str | value to coerce |
| **Returns** | **int** | **value** |

In [132]:
#| export
def make_cell(text:str, # string to split into a cell
              size:str='' # spacing 
             )->str: # wrapped text
    """
        wrap text in a makecell
    """
    # split text by commas:
    text = ''.join([x + ',\\\\' for x in text.split(',')])
    text = text[:-3]
    text = "{" + size + " \\makecell{ " + text + "} }"
    return text


In [133]:
show_doc(make_cell)

---

[source](https://github.com/kcaylor/biobib/blob/master/biobib/table.py#L59){target="_blank" style="float:right; font-size:smaller"}

### make_cell

>      make_cell (text:str, size:str='')

wrap text in a makecell

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| text | str |  | string to split into a cell |
| size | str |  | spacing |
| **Returns** | **str** |  | **wrapped text** |

In [134]:
#| export
def tex_escape(text:str # plain text message
              )->str: # message escaped to appear correctly in LaTex
    """
    formats text for use in LaTex
    """
    conv = {
        '-UUUU-': r'$^{\ddagger}$',
        '-VVVV-': r'$^{\star}$',
        '-MMMM-': r'$^{\bullet}$',
        '-AAAA-': r'$^{\blacktriangle}$',
        '-PPPP-': r'$^{\blacklozenge}$',
        '-BOLD_START-': r'\textbf{',
        '-BOLD_END-': r'}',
        '&': r'\&',
        '%': r'\%',
        '$': r'\$',
        '#': r'\#',
        '_': r'\_',
        '{': r'\{',
        '}': r'\}',
        '~': r'\textasciitilde{}',
        '^': r'\^{}',
        '\\': r'\textbackslash{}',
        '<': r'\textless{}',
        '>': r'\textgreater{}',
        'Ω': r'$\Omega$',
        'δ': r'$\delta$',
        '’': r"'",
        '‐': r'--',
        '“': r'``',
        '”': r"''",
        'é': r'\'e',
        '(?:^|\W)nan(?:$|\W)': r'--'   
    }

    text = str(text)
    regex = re.compile('|'.join(re.escape(key) for key in sorted(conv.keys(), key=lambda item: - len(item))))  # NOQA
    result = regex.sub(lambda match: conv[match.group()], text)
    if result == 'nan':
        result = ''
    return result


In [135]:
show_doc(tex_escape)

---

[source](https://github.com/kcaylor/biobib/blob/master/biobib/table.py#L73){target="_blank" style="float:right; font-size:smaller"}

### tex_escape

>      tex_escape (text:str)

formats text for use in LaTex

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| text | str | plain text message |
| **Returns** | **str** | **message escaped to appear correctly in LaTex** |

The `tex_escape` function contains the necessary code to convert the `decorate_dict` into LaTex representations:

In [136]:
from IPython.display import display, Markdown

for key, value in decorate_dict.items():
    latex_value = tex_escape(value)
    display(Markdown(rf"{key}: {value} $\rightarrow$ {latex_value}"))

Undergrad Author: -UUUU- $\rightarrow$ $^{\ddagger}$

Visitor Author: -VVVV- $\rightarrow$ $^{\star}$

PhD Committee Member: -MMMM- $\rightarrow$ $^{\bullet}$

Graduate Advisee: -AAAA- $\rightarrow$ $^{\blacktriangle}$

Postdoctoral Advisee: -PPPP- $\rightarrow$ $^{\blacklozenge}$

The `tex_escape` function also escapes greek characters:

In [137]:
test_eq(tex_escape('Ω'),'$\\Omega$')
test_eq(tex_escape('δ'),'$\\delta$')

And does some formatting conversions

In [138]:
test_eq(tex_escape('-BOLD_START-'),'\\textbf{')
test_eq(tex_escape('-BOLD_END-'),'}')

In [139]:
#| export
def str_join(df:pd.DataFrame, # dataframe 
             sep:str, # separation character between items
             *cols:list # list of columns to make into a list
            )->str: # string of values
    """ generates a string by concatenating columns in a dataframe """
    from functools import reduce
    return reduce(lambda x, y: x.astype(str).str.cat(y.astype(str), sep=sep),
                  [df[col] for col in cols])


def stringify(value:str|int|float)->str:
    """convert a value into a string"""
    return str(value)


def colonify(string:str  # string that we want to add a colon to
            )->str:      # pre-pend with a colon
    """ pre-pend a string with a colon """
    if string:
        return ": " + string
    else:
        return ""

def reversed(l:list)->list:
    """ reverse a list """
    return l[::-1]

Add these filter functions to the `latex_env`

In [140]:
#| export
latex_env.filters['colonify'] = colonify
latex_env.filters['str_join'] = str_join
latex_env.filters['tex_escape'] = tex_escape
latex_env.filters['make_cell'] = make_cell
latex_env.filters['stringify'] = stringify
latex_env.filters['reversed'] = reversed

### Create a dummy template for use

Here is a simple template that will work as a stand-in for our templates in more complex tables


In [141]:
basic_template = r'''% UC Bio-bib Generic Table
% Created on \VAR{created}

\begin{longtable}{llp{12cm}}
Year & Role & Activity \\
\hline 
\endfirsthead


\multicolumn{3}{c}%
{{\VAR{table_name} - continued from previous page }}
Year & Role & Activity \\
\hline 
\endhead

\multicolumn{3}{c}%
{{ \VAR{table_name} continued on next page }}
\endfoot

\hline \hline
\endlastfoot

\BLOCK{for item in items}
\VAR{item.Year} & \VAR{item.Role} & \VAR{item.Activity} \\
\BLOCK{endfor}
\end{longtable}

'''

In [142]:
template = latex_env.from_string(basic_template)
rendered_template = template.render(table_name='Test Table', created='today', items=[{'Year': 2020, 'Role': 'Test Role', 'Activity': 'Test Activity'}])
test_eq(rendered_template,
r'''% UC Bio-bib Generic Table
% Created on today

\begin{longtable}{llp{12cm}}
Year & Role & Activity \\
\hline 
\endfirsthead


\multicolumn{3}{c}%
{{Test Table - continued from previous page }}
Year & Role & Activity \\
\hline 
\endhead

\multicolumn{3}{c}%
{{ Test Table continued on next page }}
\endfoot

\hline \hline
\endlastfoot

2020 & Test Role & Test Activity \\
\end{longtable}
''')

In [143]:
#| export
class Table:
    
    def __init__(self, 
                 sheet:Sheet=None, # google sheet containing data
                 worksheet:str=None, # name of worksheet from which to build the table
                 csv_file:str=None, # name of .csv file
                 env:Environment=latex_env, # environment for formatting
                 table_name:str=None, # Name of table
                 template:str=None, # template variable for output
                 template_file:str=None, # template file if template string not provided
                 filters:list=None, # list of additional filter functions
                ):
        self.worksheet_name = worksheet
        self.table_name = table_name or self.__class__.__name__
        self.df = pd.read_csv(csv_file) if csv_file else sheet.get_dataframe(self.worksheet_name)
        self.columns = ""
        self.type = "longtable"
        self.env = env
        
        if filters:
            for item in filters:
                self.env.filters[item] = filters[item]
        
        if template:
            # self.template = Template(template)
            self.template = self.env.from_string(template)
        elif template_file:
            self.template = self.env.get_template(template_file)

    


### Initialize a sheet for use in making tables


In [144]:
#| export
sheet_name = "Caylor CV"
credentials_filename = 'credentials.json'
sh = Sheet(sheet_name,credentials_filename)

### Make a Table using this sheet.

In [145]:
table = Table(sheet=sh,worksheet='Summary',table_name='Test')

In [146]:
test_eq(table.table_name,'Test')
test_eq(table.worksheet_name,'Summary')
test_eq(list(table.df.columns),['Current Year', '2023'])

### Cleaning Tables

Provide a function to clean a table dataframe in preparation for generating text output. Generally, this function will always provide the capacity to remove any items from the table that aren't part of a current review period. In addition, the function should sort data into the appropriate order.


In [147]:
#| export

@patch
def table_clean_df(self:Table
                  )->pd.DataFrame: # cleaned dataframe
    """ Remove cumulative records and assign categories """
    df = copy.deepcopy(self.df)
    df = self.clean_cumulative(df)
    if hasattr(self,'category'):
        df = df[df.Type == self.category]
    return df
    

@patch
def clean_cumulative(self:Table, 
                       df:pd.DataFrame # dataframe to filter
                      )->pd.DataFrame:
    """
    
    Remove any items that are not part of the current review period if this isn't a 
    cumulative review. 
    
    """
    if self.cumulative is False:
        # Make sure we cast 'Eval' into int.
        df['Eval'] = df['Eval'].astype(int)
        df = df[df.Eval == 1]
    return df

#### Rendering Templates

A template can be rendered using the `render_template` function. This function generates the appropriate template text using the `self.template.render` function.


In [148]:
#| export

@patch
def render_template(self:Table
                   )->str:  # rendered text for this table
    """ Generate text for this table using the table's template. """
    rendered_tex = self.template.render(
        table_name=self.table_name,
        created=time.strftime("%Y-%m-%d %H:%M"),
        items=list(self.df.to_dict('records'))
    )
    return rendered_tex

@patch
def write_template(self:Table, 
                   path=None):  # location to save the template content
    """ 
    Write out the contents of this table using the table's template
    """
    content = self.render_template()
    if path:
        file = path + self.table_name + '.tex'
    else:
        file = self.table_name + 'tex'

    with open(file, "w") as f:
        print(content, file=f)


### Helper function to format href links

Provide a function to format an href link in LaTex


In [149]:
#| export

@patch
def href(self:Table, 
         this_href:str,   # href text
         link:str='link'  # link text
        )->str:   # Latex-formatted href 
    if this_href is np.NaN:
        return ""
    else:
        return f"\\href{{{this_href}}}{{[{link}]}}"

In [150]:
table = Table(sheet=sh,worksheet='Summary',table_name='Test')
test_eq(table.href('test'),'\\href{test}{[link]}')
test_eq(table.href('test','link text'),'\\href{test}{[link text]}')

In [151]:
table = Table(
    sheet=sh,
    worksheet='Summary',
    table_name='Test',
    template=basic_template)

In [152]:
#| hide
import nbdev; nbdev.nbdev_export()