In [None]:
import pandas as pd

In [None]:
publications = pd.read_csv("publications.tsv", sep="\t", header=0)
publications


In [None]:
html_escape_table = {
    "&": "&amp;",
    '"': "&quot;",
    "'": "&apos;"
    }

def html_escape(text):
    """Produce entities within text."""
    return "".join(html_escape_table.get(c,c) for c in text)

In [None]:
!rm ../_publications/*.md

In [None]:
import os
for row, item in publications.iterrows():
    
    md_filename = str(item.pub_date) + "-" + item.slug + ".md"
    html_filename = str(item.pub_date) + "-" + item.slug
    year = item.pub_date[:4]
    
    ## YAML variables
    
    md = "---\ntitle: \""   + item.title + '"\n'
    
    md += """collection: publications"""
    
    md += """\npermalink: /publication/""" + html_filename
    
    md += "\nauthors: '" + html_escape(item.authors) + "'"
    
    if len(str(item.summary)) > 5:
        md += "\nexcerpt: '" + html_escape(item.summary) + "'"
    
    md += "\ndate: " + str(item.pub_date) 
    
    md += "\nvenue: '" + html_escape(item.venue) + "'"
    
    if len(str(item.url)) > 5:
        md += "\npaperurl: '" + item.url + "'"
    
    md += "\ncitation: '" + html_escape(item.citation) + "'"
    
    if not pd.isna(item.awards):
        md += "\nawards: '" + html_escape(item.awards) + "'"
    
    md += "\n---"
    
    ## Markdown description for individual page
        
    if len(str(item.description)) > 5:
        md += "\n" + html_escape(item.description) + "\n"
    
    if len(str(item.url)) > 5:
        md += "\n[Download paper here](" + item.url + ")\n" 
        
    md += "\n```\n" + item.citation + "\n```"
    
    md_filename = os.path.basename(md_filename)
       
    with open("../_publications/" + md_filename, 'w') as f:
        f.write(md)

In [None]:
!ls ../_publications/

In [None]:
!cat ../_publications/2021-08-31-cw-nnk-generalization.md

In [None]:
for row in publications['awards']:
    if not pd.isna(row):
        print(row)

In [None]:
patents = pd.read_csv("patents.tsv", sep="\t", header=0)
patents

In [4]:
html_escape_table = {
    "&": "&amp;",
    '"': "&quot;",
    "'": "&apos;"
    }

def html_escape(text):
    """Produce entities within text."""
    return "".join(html_escape_table.get(c,c) for c in text)

In [None]:
!rm ../_patents/*.md

In [10]:
def format_author_names(authors_str):
    """Convert 'First Last, First2 Last2' to 'F. Last, F2. Last2'"""
    authors_list = authors_str.split(", ")
    formatted_authors = []
    
    for author in authors_list:
        names = author.strip().split()
        if len(names) >= 2:
            first_initial = names[0][0]
            last_name = names[-1]
            formatted_authors.append(f"{first_initial}. {last_name}")
    
    return ", ".join(formatted_authors)

In [11]:
import os
for row, item in patents.iterrows():
    
    # Create filename from filing date and application number
    md_filename = str(item.filing_date) + "-" + item.application_number.lower().replace("/","-") + ".md"
    html_filename = str(item.filing_date) + "-" + item.application_number.lower().replace("/","-")
    year = str(item.filing_date)[:4]
    
    ## YAML variables
    md = "---\ntitle: \""   + item.title + '"\n'
    
    md += """collection: patents"""
    
    md += """\npermalink: /patent/""" + html_filename
    
    md += "\nstatus: '" + html_escape(item.status) + "'"
    
    md += "\napplication_number: '" + html_escape(item.application_number) + "'"
    
    md += "\nfiling_date: " + str(item.filing_date)

    md += "\nauthors: '" + html_escape(format_author_names(item.inventors)) + "'"
    
    if len(str(item.abstract)) > 5:
        md += "\nabstract: '" + html_escape(item.abstract) + "'"
    
    md += "\n---"
    
    ## Markdown description for individual page
    if len(str(item.abstract)) > 5:
        md += "\n" + html_escape(item.abstract) + "\n"
    
    md_filename = os.path.basename(md_filename)
       
    with open("../_patents/" + md_filename, 'w') as f:
        f.write(md)

In [None]:
!ls ../_patents/