In [1]:
from copy import deepcopy
import re

def fmt_reference(ref):
    components = [re.sub(r'[^\w\s]','',x.strip()) for x in ref.split()]    
    author = components[0]
    
    year = None
    for x in components[::-1]:
        matches = re.findall('([1-3][0-9]{3})', x)
        if len(matches) > 0:
            year = matches[0]
            break
    assert type(year) == str, 'Year not found.'
    
    return '{:s}{:s}'.format(author, year)
    

In [2]:
src = 'metabolism/raw_supp.txt'
dst = 'metabolism/converted_supp.txt'

""" READ TEXT FILE """
with open(src, 'r') as file:
    txt = deepcopy(file.read())

In [7]:
""" FORMAT CITATIONS. """

parenthesis_pattern = '\(.*?\)'
year_pattern = '.([1-3][0-9]{3})'
is_ref = lambda x: len(re.findall(year_pattern, x)) > 0

matches = re.findall(parenthesis_pattern, txt)

for match in [m for m in matches if is_ref(m)]:
    if 'Seborg' in match:
        continue
    
    references = match.strip('()').split(';')
    cites = ','.join([fmt_reference(r) for r in references])
    citation = '\cite{{{:s}}}'.format(cites)
    txt = txt.replace(match, citation)

In [4]:
""" FORMAT FIGURE REFERENCES. """

# define figure IDs
num_main = 7
num_supplementary = 4
fig_names = ['{:d}'.format(i+1) for i in range(num_main)] + ['S{:d}'.format(i+1) for i in range(num_supplementary)]

# replace each reference
fig_patterns = ('Fig. {:s}', 'Figs. {:s}', 'Figure {:s}')
for fig_name in fig_names:
    fig_reference = '\\ref{{fig:metabolism:fig{:s}}}'.format(fig_name)
    for fig_pattern in fig_patterns:
        old_fmt = fig_pattern.format(fig_name)
        new_fmt = fig_pattern.format(fig_reference)
        txt = txt.replace(old_fmt, new_fmt)

In [8]:
""" WRITE TEXT FILE """
with open(dst, 'w') as file:
    file.write(txt)

In [14]:
""" RENAME FIGURES. """
from glob import glob
from os.path import join, splitext
from os import rename

p = 'metabolism/figs/rendered/'
for fpath in glob(join(p, '*.png')):
    fname, fext = splitext(fpath)
    fname = fname.rstrip('-01')
    #rename(fpath, fname+fext)