![scholtrack](https://github.com/user-attachments/assets/368377a3-c43b-4d59-a656-0f70f9d3f329)


# ScholTrack Widget Demo

**Author**: [Sergey Prokudin](https://scholar.google.com/citations?user=xSywCzAAAAAJ&hl=en).
[[Project Page](https://github.com/sergeyprokudin/scholtrack)]

This notebook contains a simple widget built on top of the ScholTrack API. THe widget allows you to provide a list of URLs from Semantic Scholar with the papers of interest, and fetch the corresponding citations. Please see the project page [README](https://github.com/sergeyprokudin/scholtrack) for detailed information.

In [None]:
# @title Install ScholTrack
!git clone https://github.com/sergeyprokudin/scholtrack.git
%cd /content/scholtrack
!pip install .
%cd /content

In [11]:
# @title Fetch Citations from Semantic Scholar
import re
import ipywidgets as widgets
from IPython.display import display, clear_output
from scholtrack.api import CitationExplorerAPI
from scholtrack.exporter import CitationExporter
from google.colab import files

# Initialize the API client
api_client = CitationExplorerAPI(api_key=None)

# Helper function to extract paper ID from URL
def extract_paper_id(url: str) -> str:
    pattern = r"semanticscholar\.org/paper/.+?/([a-f0-9]{40})"
    match = re.search(pattern, url)
    return match.group(1) if match else ""

# Function to fetch and export citations
def fetch_and_export_citations(b):
    with output:
        clear_output()
        print("Fetching citations for the provided papers, can take a few minutes for papers with thousands of citations and long lists...")

        urls_list = urls_input.value.split(",")
        paper_ids = [extract_paper_id(url) for url in urls_list]
        if not all(paper_ids):
            print("Error: Some URLs did not contain valid paper IDs.")
            return

        # Fetch citations for all paper IDs
        citations = api_client.get_citations_for_papers(paper_ids, cites_at_least_n=cites_at_least_n_slider.value)
        n_citations = len(citations)

        # Display first N results based on the display limit
        result_str = f"Found {n_citations} citations in total.\n\n"
        result_str += f"Showing first {display_limit_slider.value} results:\n\n"
        citations_sorted = sorted(citations, key=lambda x: CitationExporter.get_sort_value(x, sort_by_dropdown.value), reverse=True)

        for i, citation in enumerate(citations_sorted[:display_limit_slider.value]):
            citing_paper = citation.get("citingPaper", {})
            title = citing_paper.get("title", "N/A")
            authors = citing_paper.get("authors", [])
            author_list = ", ".join([author["name"] for author in authors])
            citation_count = citing_paper.get("citationCount", 0)
            year = citing_paper.get("year", "N/A")
            abstract = citing_paper.get("abstract", "N/A") if show_abstract_checkbox.value else "Abstract hidden"
            venue = citing_paper.get("venue", "N/A")
            external_ids = citing_paper.get("externalIds", {})
            arxiv_url = f"https://arxiv.org/abs/{external_ids.get('ArXiv', '')}" if external_ids.get("ArXiv") else "N/A"
            semantic_scholar_url = citing_paper.get("url", "N/A")

            print(f"{i+1}. Title: {title}")
            print(f"   Authors: {author_list}")
            print(f"   Citation Count: {citation_count}")
            print(f"   Year: {year}")
            print(f"   Venue: {venue}")
            if show_abstract_checkbox.value:
                print(f"   Abstract: {abstract}")
            print(f"   ArXiv URL: {arxiv_url}")
            print(f"   Semantic Scholar URL: {semantic_scholar_url}\n")

        # Save citations based on the output type and return file download
        output_file = "citations_output." + output_type_dropdown.value
        if output_type_dropdown.value == 'csv':
            CitationExporter.save_to_csv(citations, filename=output_file, sort_by=sort_by_dropdown.value)
        elif output_type_dropdown.value == 'json':
            CitationExporter.save_to_json(citations, filename=output_file)
        elif output_type_dropdown.value == 'txt':
            CitationExporter.save_to_txt(citations, filename=output_file, sort_by=sort_by_dropdown.value, show_abstract=show_abstract_checkbox.value)

        print("Found %d citations, showing top %d. Please see the output file (JSON, CSV, TXT) file for the full list." % (len(citations), display_limit_slider.value))
        files.download(output_file)

# Create interactive widgets
urls_input = widgets.Textarea(
    value='https://www.semanticscholar.org/paper/NeRF-Mildenhall-Srinivasan/428b663772dba998f5dc6a24488fff1858a0899f,https://www.semanticscholar.org/paper/DreamFusion%3A-Text-to-3D-using-2D-Diffusion-Poole-Jain/4c94d04afa4309ec2f06bdd0fe3781f91461b362,https://www.semanticscholar.org/paper/3D-Gaussian-Splatting-for-Real-Time-Radiance-Field-Kerbl-Kopanas/2cc1d857e86d5152ba7fe6a8355c2a0150cc280a',
    placeholder='Enter Semantic Scholar URLs separated by commas',
    description='URLs:',
    layout=widgets.Layout(width='90%', height='100px')
)

output_type_dropdown = widgets.Dropdown(
    options=['csv', 'json', 'txt'],
    value='csv',
    description='Output Type:',
    layout=widgets.Layout(width='50%')
)

sort_by_dropdown = widgets.Dropdown(
    options=['citations', 'arxiv', 'year'],
    value='citations',
    description='Sort By:',
    layout=widgets.Layout(width='50%')
)

display_limit_slider = widgets.IntSlider(
    value=5,
    min=1,
    max=50,
    step=1,
    description='Display Limit:',
    layout=widgets.Layout(width='60%')
)

cites_at_least_n_slider = widgets.IntSlider(
    value=0,
    min=0,
    max=10,
    step=1,
    description='Cites-at-least-n:',
    layout=widgets.Layout(width='60%')
)

show_abstract_checkbox = widgets.Checkbox(
    value=False,
    description='Show Abstracts',
    layout=widgets.Layout(width='30%')
)

run_button = widgets.Button(
    description='Fetch Citations',
    button_style='success',
    layout=widgets.Layout(width='40%', height='50px')
)

# Output area for displaying results
output = widgets.Output()

# When the button is clicked, fetch and export citations
run_button.on_click(fetch_and_export_citations)

# Display the widgets
display(
    widgets.VBox([
        urls_input,
        widgets.HBox([output_type_dropdown, sort_by_dropdown]),
        display_limit_slider,
        cites_at_least_n_slider,
        show_abstract_checkbox,
        run_button,
        output
    ])
)


VBox(children=(Textarea(value='https://www.semanticscholar.org/paper/NeRF-Mildenhall-Srinivasan/428b663772dba9…