In [1]:
from google.colab import drive
drive.mount('/gdrive')

Mounted at /gdrive


In [2]:
%cd /gdrive/MyDrive/Colab/projects/convert_bbl_to_bibtex

/gdrive/MyDrive/Colab/projects/convert_bbl_to_bibtex


In [12]:
import re

def convert_bibitem_to_bibtex(bibitem):
    try:
        # Extract citation key
        key_match = re.search(r'\{([^,]+),', bibitem)
        if not key_match:
            return None
        key = key_match.group(1)
        print('key_match=',key_match)
        # Extract authors
        author_match = re.search(r'\bibitem\[[^\]]+\]\{([^}]+)\}(.+?)(?=\\newblock)', bibitem, re.DOTALL)
        if not author_match:
            return None
        authors = author_match.group(2).strip()
        authors = ' and '.join([a.strip() for a in authors.split(',')])

        # Extract title
        title_match = re.search(r'\\newblock\s+(.+?)(?=\\newblock|\\emph)', bibitem, re.DOTALL)
        if not title_match:
            return None
        title = title_match.group(1).strip()

        # Extract other fields
        fields = {}
        journal_match = re.search(r'\\emph\{(.+?)\}', bibitem, re.DOTALL)
        if journal_match:
            fields['journal'] = journal_match.group(1).strip()

        year_match = re.search(r',\s+(\d{4})\.', bibitem)
        if year_match:
            fields['year'] = year_match.group(1).strip()

        url_match = re.search(r'URL\s+\\url\{(.+?)\}', bibitem)
        if url_match:
            fields['url'] = url_match.group(1).strip()

        # Build bibtex entry
        entry_type = 'article' if 'journal' in fields else 'inproceedings'
        bibtex_entry = f"@{entry_type}{{{key},\n"
        bibtex_entry += f"  author = {{{authors}}},\n"
        bibtex_entry += f"  title = {{{title}}},\n"
        for field, value in fields.items():
            bibtex_entry += f"  {field} = {{{value}}},\n"
        bibtex_entry = bibtex_entry.rstrip(',\n') + '\n'  # Remove trailing comma
        bibtex_entry += "}\n"

        return bibtex_entry
    except Exception as e:
        print(f"Error processing bibitem: {bibitem}\n{e}")
        return None

def process_all_bibitems(bbl_content):
    bibtex_entries = []

    # Split the bbl content into individual bibitems
    bibitems = re.split(r'\n(?=\\bibitem)', bbl_content)

    for item in bibitems:
        print('item=',item)
        bibtex_entry = convert_bibitem_to_bibtex(item)
        print('bibtex_entry=',bibtex_entry)

        if bibtex_entry:
            bibtex_entries.append(bibtex_entry)

    # Combine all valid bibtex entries into a single string
    bibtex_content = '\n\n'.join(bibtex_entries)

    return bibtex_content

def main(bbl_file_path, bibtex_file_path):
    # Read the .bbl file
    with open(bbl_file_path, 'r', encoding='utf-8') as file:
        bbl_content = file.read()

    # Process all bibitems and generate the bibtex content
    bibtex_content = process_all_bibitems(bbl_content)

    # Write the valid bibtex content to a .bib file
    with open(bibtex_file_path, 'w', encoding='utf-8') as file:
        file.write(bibtex_content)



In [None]:
# Define the input and output file paths
bbl_file_path = './example.bbl'
bibtex_file_path = './example_output.bib'

# Run the main function
main(bbl_file_path, bibtex_file_path)


item= \begin{thebibliography}{50}
\providecommand{\natexlab}[1]{#1}
\providecommand{\url}[1]{\texttt{#1}}
\expandafter\ifx\csname urlstyle\endcsname\relax
  \providecommand{\doi}[1]{doi: #1}\else
  \providecommand{\doi}{doi: \begingroup \urlstyle{rm}\Url}\fi

bibtex_entry= None
item= \bibitem[Berthelot et~al.(2023)Berthelot, Autef, Lin, Yap, Zhai, Hu, Zheng,
  Talbot, and Gu]{berthelot2023tract}
David Berthelot, Arnaud Autef, Jierui Lin, Dian~Ang Yap, Shuangfei Zhai, Siyuan
  Hu, Daniel Zheng, Walter Talbot, and Eric Gu.
\newblock Tract: Denoising diffusion models with transitive closure
  time-distillation.
\newblock \emph{arXiv preprint arXiv:2303.04248}, 2023.

key_match= <re.Match object; span=(93, 130), match='{berthelot2023tract}\nDavid Berthelot,'>
bibtex_entry= None
item= \bibitem[Brock et~al.(2019)Brock, Donahue, and Simonyan]{brock2018large}
Andrew Brock, Jeff Donahue, and Karen Simonyan.
\newblock Large scale {GAN} training for high fidelity natural image synthesis.
\newbloc