The purpose of this tool is to update a BibTeX file exported from Scopus to also include the cited references in a format that can be used for the analysis using PyBibX.
The second export with the same search query of the references as CSV file is required.

In [None]:
import pandas as pd
import bibtexparser
import re

# File paths
csv_file_path = #CSV file path
bib_file_path = #BibTeX file path
output_bib_path = 'cleaned.bib'

# Load the CSV file
csv_data = pd.read_csv(csv_file_path, encoding='utf-8')

# filling missing values to avoid float errors
csv_data['DOI'] = csv_data['DOI'].astype(str)
csv_data['References'] = csv_data['References'].astype(str)

# Standardize DOI in the CSV
csv_data['DOI'] = csv_data['DOI'].str.strip().str.lower()

# Ensure the CSV has the necessary columns
if 'DOI' not in csv_data.columns or 'References' not in csv_data.columns:
    raise ValueError("CSV must contain 'DOI' and 'References' columns.")

# Load the existing BibTeX file
with open(bib_file_path, 'r', encoding='utf-8') as bib_file:
    bib_database = bibtexparser.load(bib_file)

# Create a dictionary of existing BibTeX entries by DOI (lowercased)
doi_to_entry = {entry.get('doi', '').lower(): entry for entry in bib_database.entries}

# Track updates
updated_entries_count = 0

# Iterate over the CSV rows
for _, row in csv_data.iterrows():
    doi = str(row['DOI']).strip().lower()
    reference_text = row['References'].strip()

    # Skip if DOI is missing
    if not doi or doi not in doi_to_entry:
        continue

    # Get the corresponding BibTeX entry
    entry = doi_to_entry[doi]

    # Append the reference text to the 'note' or 'annote' field
    entry['references'] = reference_text

    updated_entries_count += 1


# Write the updated BibTeX file
with open(output_bib_path, 'w', encoding='utf-8') as output_bib_file:
    bibtexparser.dump(bib_database, output_bib_file)

cleaned_content = re.sub(r'@comment.*\n', '', output_bib_file, flags=re.IGNORECASE)

# Save the cleaned content to a new BibTeX file (optional)
with open(output_bib_path, 'w', encoding='utf-8') as output_bib_file:
    output_bib_file.write(cleaned_content)

print(f"Updated BibTeX file saved to '{output_bib_path}'. Updated {updated_entries_count} entries.")