# Publications

A set of tables for handling publications

In [208]:
#| default_exp Publications

In [209]:
#| export
#| hide
from biobib.table import Table, sh, tex_escape, colonify, to_int
from biobib.sheet import Sheet
from nbdev import show_doc
import pandas as pd
import numpy as np

In [210]:
#| hide
from fastcore.test import *
from fastcore.test import *
from fastcore.basics import *
from fastcore.foundation import *

In [211]:
#| export
publication_types = {
    'P': 'Published',
    'A': 'In Press',
    'R': 'In Review'
}

### Define a template for publications

Here is a template we can use for publications. It generates a longtable which can span multiple pages, and provides publications in a format suitable for inclusion in a UC Biobib


In [212]:
#| export
publication_biobib_template = r'''% UC Bio-bib Publication Table
% Created on \VAR{created}

\begin{longtable}{p{1cm}p{0.5cm}p{7.75cm}>{\raggedright}p{5.25cm}p{1.75cm}}
\# & Year & Title and Authors & Publisher & Category\\
\\\hline 
\endfirsthead


\multicolumn{5}{c}%
{{\VAR{table_name} - continued from previous page }} \\ \\
\# & Year & Title and Authors & Publisher & Category\\
\hline 
\endhead

\\
\multicolumn{5}{c}%
{{ \VAR{table_name} continued on next page }} \\
\endfoot

\hline \hline
\endlastfoot

\BLOCK{for publication in items}
\BLOCK{if publication['New?'] != 'Y'}
    \VAR{publication|make_row}
\BLOCK{endif}
\BLOCK{endfor}
\\\hline
\\\hline
   &   & {\bf Since Prior Review:} &    &   \\\\
\BLOCK{for publication in items}
\BLOCK{if publication['New?'] == 'Y'}
    \VAR{publication|make_row}
\BLOCK{endif}
\BLOCK{endfor}
\end{longtable}
'''

#### Author and Editor columns

Authors and editors are found in columns of the publication spreadsheet that begin with `A` for authors and `E` for editors. The number after the `A` or `E` indicates the order of the author or editor. For example, `A1` is the first author, `A2` is the second author, and so on. The same is true for editors.

We set the `max_author` and `max_editor` to the maximum number of authors and editors we expect to see in a publication. This can vary across different publication lists. We use these max values to generate the columns for the authors and editors.

In [213]:
#| export
max_author = 30
max_editor = 4
author_cols = ['A' + str(i+1) for i in range(max_author)]
editor_cols = ['E' + str(i+1) for i in range(max_editor)]

#### Group affiliation symbols

Sometimes it is helpful to indicate group affiliations for authors. For example, if there are graduate students or undergraduate students who contributed to an article, we can indicate this with a symbol. We can define a dictionary of symbols and groups to use for this purpose. The dictionary keys are the symbols and the values are the groups. We can then use this dictionary to generate a column of symbols for the authors.

In [214]:
#| export
decorate_dict = {
    'Undergrad Author': "-UUUU-",
    'Visitor Author': "-VVVV-",
    'PhD Committee Member': "-MMMM-",
    'Graduate Advisee': "-AAAA-",
    'Postdoctoral Advisee': "-PPPP-"
}

In [215]:
class Publications(Table):
    """ 
    A Publication class of Table for use in generating biobibs and CVs 
    """
    def __init__(
            self,
            sheet:Sheet=None, # Google sheet object
            worksheet:str='Publications', # worksheet name in google sheet
            csv_file:str=None, # optional csv file (if using csv files) 
            category:str='P',  # type of service (P = Published, A = In Press, R = In Review)
            table_name:str=None, # Will be determined from category.
            kind=None, # kind of publication (e.g., journal article, book chapter, etc.)
            bold_author:bool=False, # Should the PI author be bolded?
            decorate_groups:bool=False, # Should group-affiliated authors be indicated?
            cumulative:bool=True, # Is this table cumulative? 
            template=publication_biobib_template # default template content
    ):
        filters = {
            'make_row': self.make_row,
            'make_citation': self.make_citation,
            'doi': self.doi,
            'doi_link': self.doi_link,
            'href': self.href
        }
        super().__init__(
            sheet=sheet,
            worksheet=worksheet, 
            table_name=table_name,
            csv_file=csv_file, 
            template=template,
            filters=filters)
        self.cumulative = True if cumulative else False
        self.kind = kind
        self.bold_author = bold_author
        self.decorate_groups = decorate_groups
        self.category = category
        self.df = self.clean_df()
    
    def category_lookup(self, category):
        Categories = {
            'RA': 'Refereed Article',
            'CA': 'Conference Abstract',
            'BC': 'Refereed Book Chapter',
            'CP': 'Refereed Conference Proceedings'
        }
        return Categories[category]
    
    def doi(self, this_doi):
        if this_doi is np.NaN:
            return ""
        else:
            return "doi:{doi}.".format(doi=this_doi)

    def href(self, this_href, text="[pdf]"):
        if this_href is np.NaN:
            return ""
        else:
            return "\\href{{{href}}}{{{text}}}".format(
                href=this_href, text=text)

    def doi_link(self, this_doi):
        if this_doi is np.NaN:
            return ""
        else:
            this_href = "https://doi.org/{doi}".format(doi=str(this_doi))
            text = "doi:{doi}".format(doi=str(this_doi).rstrip())
            return "\\href{{{href}}}{{{text}}}".format(href=this_href, text=text)

    def make_author_list(self, row):
        # We're dealing with dict now, not a pd.Series...
        author_list = pd.Series(
            pd.Series(row)[author_cols].fillna('').values.tolist()
        )
        author_list = author_list[author_list.apply(len) > 0]
        return author_list.tolist()

    def make_row(self, row):
        if row['Kind'] == 'RA':
            return self.make_article(row)
        elif row['Kind'] == 'BC':
            return self.make_chapter(row)
        elif row['Kind'] == 'CP':
            return self.make_chapter(row)

    @staticmethod
    def make_bold(fmt_cols, this_list):
        a = this_list.copy()
        for col in fmt_cols:
            a[col] = "-BOLD_START- " + a[col] + "-BOLD_END-"
        return a

    @staticmethod
    def add_decorator(fmt_cols, this_list, decorator="$^{*}$"):
        a = this_list.copy()
        for col in fmt_cols:
            a[col] = decorator+a[col]
        return a

    def _columns(self, column_string):
        try:
            return [int(a[1:])-1 for a in column_string.split(',')]
        except AttributeError:
            return []

    def format_authors(self, this_row):
        author_list = self.make_author_list(this_row)
        if self.bold_author:
            fmt_cols = self._columns(this_row['PI Author'])
            author_list = self.make_bold(fmt_cols, author_list)
        if self.decorate_groups:
            for column, decorator in decorate_dict.items():
                if this_row[column]:
                    fmt_cols = self._columns(this_row[column])
                    author_list = self.add_decorator(
                        fmt_cols, author_list, decorator=decorator)
        this_row['authors'] = ', '.join(author_list)
        return this_row

    def make_citation(self, this_row):
        this_row = self.format_authors(this_row)
        citation = "\item "
        citation += "{authors} ({year}) {title}. \\emph{{{publisher}}}, {volume}{pages}. {href}".format(  # NOQA
            year=tex_escape(str(this_row['YEAR'])),
            title=tex_escape(this_row['TITLE'].strip()),
            authors=tex_escape(this_row['authors']),
            # doi=self.doi(this_row['DOI']),
            href=self.doi_link(this_row['DOI']),
            volume=tex_escape(this_row['VOL']),
            pages=colonify(tex_escape(this_row['PAGES'])),
            publisher=tex_escape(this_row['PUBLISHER'])
        )
        return citation

    def make_article(self, this_row):
        this_row = self.format_authors(this_row)
        row = ""
        row += "{code} & {year} & {{\\bf {title}}}, {authors} {href} & \\emph{{ {publisher} }} {volume}{pages}. {doi}  & {category}".format(  # NOQA
            code=tex_escape(to_int(this_row['NUM'])),
            year=tex_escape(str(this_row['YEAR'])),
            title=tex_escape(this_row['TITLE'].strip()),
            authors=tex_escape(this_row['authors']),
            doi=self.doi(this_row['DOI']),
            href=self.href(this_row['Link']),
            volume=tex_escape(this_row['VOL']),
            pages=colonify(tex_escape(this_row['PAGES'])),
            publisher=tex_escape(this_row['PUBLISHER']),
            category=tex_escape(self.category_lookup(this_row['Kind']))
        )
        row += "\\\\"
        return row

    def get_editors(self, editors):
        if editors:
            return editors + " (eds.)."
        else:
            return ""

    def make_chapter(self, this_row):
        this_row = self.format_authors(this_row)
        row = ""
        row += "{code} & {year} & {{\\bf {title}}}, {authors} & {editors} \\emph{{ {book} }}. {publisher} & {category}".format(  # NOQA
            code=tex_escape(to_int(this_row['NUM'])),
            year=tex_escape(str(this_row['YEAR'])),
            title=tex_escape(this_row['TITLE'].strip()),
            authors=tex_escape(this_row['authors']),
            editors=tex_escape(self.get_editors(this_row['editors'])),
            book=tex_escape(this_row['Book Title']),
            publisher=tex_escape(this_row['PUBLISHER']),
            category=tex_escape(self.category_lookup(this_row['Kind']))
        )
        row += "\\\\"
        return row

### Provide a data cleaning function for the publication table

In [216]:
#| export

@patch
def clean_df(self:Publications,
             sort_by:str='Year', # variable to sort by
             ascending:bool=True # ascending?
            )->pd.DataFrame:  # cleaned dataframe
    """
    Clean the Service table.
    
    """
    df = Table.table_clean_df(self)
    # Step 1: drop any papers not published
    # Step 1.1: filter to the target publication type, if any.
    if self.kind:
        df = df[df.Kind == self.kind]
    # Step 2: Concatenate authors into a single list, making sure to drop
    # empty author columns
    df['authors'] = list(
            pd.Series(df[author_cols] 
                    .fillna('').values.tolist())
            .apply(lambda x: [i for i in x if i != ''])
            .apply(lambda x: ', '.join(x))
    )
    df['editors'] = list(
            pd.Series(df[editor_cols]
                    .fillna('').values.tolist())
            .apply(lambda x: [i for i in x if i != ''])
            .apply(lambda x: ', '.join(x))
    )
    # Step 3: Cast DOI as a string and remove nan
    df.loc[df['DOI'] == 'nan', 'DOI'] = np.nan

    # Step 4: Cast Pages as a string and remove nan
    df.loc[df['PAGES'] == 'nan', 'PAGES'] = np.nan

    # Step 5: Cast Volume as a string and remove nan
    df.loc[df['VOL'] == 'nan', 'VOL'] = np.nan

    #df = df.sort_values(by=[sort_by], ascending=[ascending])
    return df

In [217]:
sh.get_dataframe

<bound method Sheet.get_dataframe of <biobib.sheet.Sheet object at 0x17759d330>>

In [218]:
biobib_table = Publications(
    sheet=sh,worksheet='Publications',
    table_name='Test',
    template=publication_biobib_template
)

In [219]:
test_eq(biobib_table.table_name,'Test')
test_eq(
    list(biobib_table.df.columns),
    ['NUM', 'YEAR', 'Type', 'TITLE', 'PUBLISHER', '#', 'CODE', 'New?', 
     'Kind', 'Undergrad Author', 'Visitor Author', 'PhD Committee Member', 
     'Graduate Advisee', 'Postdoctoral Advisee', 'PI Author', 'A1', 'A2', 
     'A3', 'A4', 'A5', 'A6', 'A7', 'A8', 'A9', 'A10', 'A11', 'A12', 'A13', 
     'A14', 'A15', 'A16', 'A17', 'A18', 'A19', 'A20', 'A21', 'A22', 'A23', 
     'A24', 'A25', 'A26', 'A27', 'A28', 'A29', 'A30', 'A31', 'E1', 'E2', 
     'E3', 'E4', 'VOL', 'PAGES', 'Book Title', 'VOLUME', 'ISSUE', 
     'PSTART', 'PEND', 'LENGTH', 'Journal Impact Factor ', 
     'Journal Ranking (2017)', 'Journal Ranking (2019)', 'JIF Percentile', 
     'Discpline Rank', 'Broadest Rank', 'Quantile', '1st/2nd', 
     'SENIOR', 'TOTAL CITES', 'COUNT', 'DOI', 'Affiliation', 'Link', 
     'AGU?', 'Grant #1', 'Grant #2', 'Grant #3', 'Abstract', 
     'authors', 'editors'
    ])

In [220]:
print(biobib_table.render_template())

% UC Bio-bib Publication Table
% Created on 2023-06-29 19:09

\begin{longtable}{p{1cm}p{0.5cm}p{7.75cm}>{\raggedright}p{5.25cm}p{1.75cm}}
\# & Year & Title and Authors & Publisher & Category\\
\\\hline 
\endfirsthead


\multicolumn{5}{c}%
{{Test - continued from previous page }} \\ \\
\# & Year & Title and Authors & Publisher & Category\\
\hline 
\endhead

\\
\multicolumn{5}{c}%
{{ Test continued on next page }} \\
\endfoot

\hline \hline
\endlastfoot

    1 & 2000 & {\bf Approaches for the estimation of primary productivity and vegetation structure in the Kalahari region}, Dowty, P.R., Caylor, K.K., Shugart, H.H,, Emanuel, W.R. & Ringrose, S., Chanda, R. (eds.). \emph{ Towards Sustainable Natural Resource Management in the Kalahari Region.  }. University of Botswana Press & Refereed Book Chapter\\
    2 & 2002 & {\bf The southern African Regional Science Initiative (SAFARI 2000): Wet season campaigns}, Otter, LB, Scholes, RJ, Dowty, PR, Privette, JP, Caylor, K.K., Ringrose, S, Mukelab

In [221]:
#| export
publication_cv_template = r'''% UC CV Publication Table
% Created on \VAR{created}

\BLOCK{ set years = [] }
\BLOCK{ for publication in items if publication.YEAR not in years }
    \BLOCK{ do years.append(publication.YEAR) }
\BLOCK{ endfor }

\BLOCK{ set max_year = years[-1]}

\mbox{\ \ \ \underline{\textbf{\VAR{max_year } }}}

\begin{etaremune}

\BLOCK{for publication in items}
\BLOCK{if (publication.YEAR == max_year) }
\VAR{publication|make_citation}
\BLOCK{ endif }
\BLOCK{ endfor }

\BLOCK{ for year in years|reverse }
\BLOCK{ if (year != max_year) }

\vspace{0.1in}
\mbox{\ \ \ \underline{\textbf{\VAR{year } }}}
\vspace{0.1in}

\BLOCK{for publication in items}
\BLOCK{if (publication.YEAR == year) }
\VAR{publication|make_citation}
\BLOCK{ endif }
\BLOCK{ endfor }
\BLOCK{ endif }
\BLOCK{ endfor }

\end{etaremune}
'''

In [222]:
cv_table = Publications(
    sheet=sh,worksheet='Publications',
    table_name='Publications',
    template=publication_cv_template,
    bold_author=True,
    decorate_groups=True
)

In [223]:
print(cv_table.render_template())

% UC CV Publication Table
% Created on 2023-06-29 19:09

                                                                                            

\mbox{\ \ \ \underline{\textbf{2023 }}}

\begin{etaremune}

\item Giroux, S., Kaminski, P., Waldman, K., Blekking, J., Evans, T.P., \textbf{ Caylor, K.K.} (2023) Smallholder social networks: Advice seeking and adaptation in rural Kenya. \emph{Agricultural Systems}, 205(103574). \href{https://doi.org/10.1016/j.agsy.2022.103574}{doi:10.1016/j.agsy.2022.103574}
\item $^{\bullet}$Warter, M., Singer, M.B., Cuthbert, M., Roberts, D.A., \textbf{ Caylor, K.K.}, Sabathier, R., Stella, J. (2023) Modeling seasonal vegetation phenology from hydroclimatic drivers for contrasting plant functional groups within drylands of the Southwestern USA. \emph{Environmental Research: Ecology}, . \href{https://doi.org/10.1088/2752-664X/acb9a0}{doi:10.1088/2752-664X/acb9a0}
\item $^{\bullet}$Forbes, E., $^{\ddagger}$Benenati, V., Frey, S., Hirsch, M, Koech, G., $^

In [224]:
cv_table.write_template(path='../tex/CV/')

In [225]:
class InPress(Publications):

    def __init__(self,
                 category='A',
                 table_name='Works in Press',
                 template=publication_cv_template,
                 **kwargs
            ):
        super().__init__(
            table_name=table_name,
            template=template, category=category,
            **kwargs)
        self.category = category


class Submitted(Publications):

    def __init__(self, 
                 category='R',
                 table_name='Works Submitted',
                 template=publication_cv_template,
                 **kwargs
                 ):
        super().__init__(
            table_name=table_name,
            template=template, category=category,
            **kwargs)
        self.category = category


In [226]:
in_press_table = InPress(
    sheet=sh,worksheet='Publications',
    table_name='In Press',
    template=publication_cv_template,
    bold_author=True,
    decorate_groups=True
)

  pd.Series(df[author_cols]
  pd.Series(df[editor_cols]


In [227]:
print(in_press_table.render_template())

% UC CV Publication Table
% Created on 2023-06-29 19:09



\mbox{\ \ \ \underline{\textbf{ }}}

\begin{etaremune}



\end{etaremune}


In [228]:
in_review_table = Submitted(
    sheet=sh,worksheet='Publications',
    table_name='In Press',
    template=publication_cv_template,
    bold_author=True,
    decorate_groups=True
)

In [229]:
print(in_review_table.render_template())

% UC CV Publication Table
% Created on 2023-06-29 19:09

    

\mbox{\ \ \ \underline{\textbf{2023 }}}

\begin{etaremune}

\item Warren, J., $^{\blacktriangle}$DeCarlo, K, Hassina, B., Bilheux, J.C., \textbf{ Caylor, K.K} (2023) Integrating fine root diameter and watershed mapping to characterize rhizosphere hydrology. \emph{Rhizosphere}, . \href{https://doi.org/}{doi:}
\item $^{\blacktriangle}$Morgan, B., \textbf{ Caylor, K.K.} (2023) Improving plant water use observations with UAV-enabled surface energy balance and atmospheric profiling techniques. \emph{Water Resources Research}, . \href{https://doi.org/}{doi:}
\item $^{\blacktriangle}$Boser, A., \textbf{ Caylor, K.K.}, Larsen, A., Pascolini-Campbell, M., Reager, J.T., Carleton, T. (2023) Machine learning and remote sensing reveal water savings potential in California agriculture. \emph{Nature Sustainability}, . \href{https://doi.org/}{doi:}
\item Lopus, S., Waldman, K., Guido, Z., \textbf{ Caylor, K.K.}, Evans, T. (2023) Changing p

In [230]:
#| hide
import nbdev; nbdev.nbdev_export()