In [24]:
import time
import requests
import json
import pandas as pd

class RateLimitExceededError(Exception):
    """Custom exception for rate limit errors."""
    pass

def exponential_backoff_retry(
    func,
    title,
    max_retries=5,
    base_delay=1,
    max_delay=32
):
    """
    Retries a function with exponential backoff.

    Args:
        func: A callable that may raise an exception.
        title: The title of the paper to search for.
        max_retries: Maximum number of retries before giving up.
        base_delay: Initial delay in seconds.
        max_delay: Maximum delay in seconds.

    Returns:
        The result of the function if successful.

    Raises:
        The last exception raised if all retries fail.
    """
    for attempt in range(1, max_retries + 1):
        try:
            return func(title)
        except RateLimitExceededError as e:
            if attempt == max_retries:
                raise  # Re-raise the last exception
            else:
                delay = min(base_delay * 2**(attempt - 1), max_delay)
                print(f"Attempt {attempt} failed due to rate limit. Retrying in {delay:.2f} seconds...")
                time.sleep(delay)
        except Exception as e:
            print(f"An error occurred: {e}")
            raise

def search_paper_by_title(title):
    url = "https://api.semanticscholar.org/graph/v1/paper/search"
    params = {
        "query": f"title:({title})",
        "fields": "title,url,publicationTypes,publicationDate,openAccessPdf,citationCount,authors,abstract",
        "year": "2020-",
        "limit": 1  # Adjust as needed
    }

    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        if data.get("data"):
            return data["data"][0]  # Return the first matching paper
        else:
            print(f"No matching papers found for {title}.")
            return None
    elif response.status_code == 429:
        raise RateLimitExceededError("Rate limit exceeded. Please wait before retrying.")
    else:
        response.raise_for_status()
        
def get_paper_citations(paper_id, fields=None, year=None, limit=None):
    """
    Retrieves citation information for a given paper ID from the Semantic Scholar API.

    Args:
        paper_id (str): The Semantic Scholar paper ID.
        fields (list, optional): List of fields to include in the response. Defaults to None. Options include:
            * title
            * url
            * publicationTypes
            * publicationDate
            * openAccessPdf
            * citationCount
            * authors
            * abstract
            * contexts
            * intents
            * isInfluential

    Returns:
        dict: Citation data for the specified paper.
    """
    base_url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}/citations"
    params = {}
    if fields:
        params['fields'] = ','.join(fields)
    if year:
        params['year'] = year
    if limit:
        params['limit'] = limit
    
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error {response.status_code}: Unable to fetch citations for paper ID {paper_id}")
        return None
    
def get_paper_references(paper_id, fields=None, year=None, limit=None):
    """
    Retrieves citation information for a given paper ID from the Semantic Scholar API.

    Args:
        paper_id (str): The Semantic Scholar paper ID.
        fields (list, optional): List of fields to include in the response. Defaults to None. Options include:
            * title
            * url
            * publicationTypes
            * publicationDate
            * openAccessPdf
            * citationCount
            * authors
            * abstract
            * contexts
            * intents
            * isInfluential

    Returns:
        dict: Citation data for the specified paper.
    """
    base_url = f"https://api.semanticscholar.org/graph/v1/paper/{paper_id}/references"
    params = {}
    if fields:
        params['fields'] = ','.join(fields)
    if year:
        params['year'] = year
    if limit:
        params['limit'] = limit
    
    response = requests.get(base_url, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error {response.status_code}: Unable to fetch citations for paper ID {paper_id}")
        return None


In [28]:
paper_titles = [
    'Latent Space Editing in Transformer-Based Flow Matching', 
    'A Survey of Multimodal Controllable Diffusion Models',
    'Paint by Example: Exemplar-based Image Editing with Diffusion Models',
    'Smooth Diffusion: Crafting Smooth Latent Spaces in Diffusion Models',
    "Unifying Diffusion Models' Latent Space, with Applications to CycleDiffusion and Guidance",
    'Learned representation-guided diffusion models for large-image generation',
    'GDUI: Guided Diffusion Model for Unlabeled Images',
    "Hierarchical Clustering for Conditional Diffusion in Image Generation",
    "Nested Diffusion Models Using Hierarchical Latent Priors",
    "Self-Guided Diffusion Models",
    "Gradient Guidance for Diffusion Models: An Optimization Perspective",
    "DiffMat: Latent diffusion models for image-guided material generation",
    "Adding Conditional Control to Text-to-Image Diffusion Models",
    "DIFFUSION MODELS ALREADY HAVE A SEMANTIC LATENT SPACE",
    "Understanding the Latent Space of Diffusion Models through the Lens of Riemannian Geometry",
    "UNIFYING DIFFUSION MODELS' LATENT SPACE, WITH APPLICATIONS TO CYCLEDIFFUSION AND GUIDANCE",
    "MOTION GUIDANCE: DIFFUSION-BASED IMAGE EDITING WITH DIFFERENTIABLE MOTION ESTIMATORS",
    "Seeing and Hearing: Open-domain Visual-Audio Generation with Diffusion Latent Aligners"
]

data = []
for title in paper_titles:
    try:
        paper_data = exponential_backoff_retry(search_paper_by_title, title)
        if paper_data:
            # print(json.dumps(paper_data, indent=2))
            data.append(paper_data)
    except RateLimitExceededError:
        print("Exceeded rate limit. Please try again later.")
    except Exception as e:
        print(f"An error occurred: {e}")

df = pd.json_normalize(data)
print(df)

No matching papers found for A Survey of Multimodal Controllable Diffusion Models.
No matching papers found for Hierarchical Clustering for Conditional Diffusion in Image Generation.
No matching papers found for Gradient Guidance for Diffusion Models: An Optimization Perspective.
                                     paperId  \
0   ca743e75ce090bbf686307e41bd8747661768fbe   
1   4f1502111d35aa6651dfaedfeb1184b3c3dd2fcb   
2   3333fa6dc9d39cad3d5cd87da9ae39e5a6aefe27   
3   36a5328c337697b96c9e6a9a04df0c924aa421f7   
4   0bbd619ad6dfb69114735d6d8ca166c20301188b   
5   2cfb086b6414e990a2203da746c05ba0c0638134   
6   5a613652d700f9a271b6d01c7d9e4223e9883300   
7   b798c925a4c43ea09e76a1c748491ef70067c0c6   
8   019abc8974dd46d7eec9a51818f64cc896c66499   
9   efbe97d20c4ffe356e8826c01dc550bacc405add   
10  a02313d56a6f71be9aafe43628e0f3a1d0cb858e   
11  d7074976c2609568902a6b6ca45f6c71d9cb66bf   
12  36a5328c337697b96c9e6a9a04df0c924aa421f7   
13  c27da349811cbbafd5896befb0bb138d87583873   

In [None]:
for i, row in enumerate(data):
    print(f"{paper_titles[i]}:\n{data[i]}\n")

In [10]:
df.columns

Index(['paperId', 'url', 'title', 'abstract', 'citationCount', 'openAccessPdf',
       'publicationTypes', 'publicationDate', 'authors', 'openAccessPdf.url',
       'openAccessPdf.status'],
      dtype='object')

In [14]:
df[['title', 'citationCount', 'publicationDate']]

Unnamed: 0,title,citationCount,publicationDate
0,Latent Space Editing in Transformer-Based Flow...,22,2023-12-17
1,Paint by Example: Exemplar-based Image Editing...,327,2022-11-23
2,Smooth Diffusion: Crafting Smooth Latent Space...,15,2023-12-07
3,"Unifying Diffusion Models' Latent Space, with ...",59,2022-10-11
4,Learned Representation-Guided Diffusion Models...,15,2023-12-12
5,GDUI: Guided Diffusion Model for Unlabeled Images,0,2024-03-18
6,Nested Diffusion Models Using Hierarchical Lat...,0,2024-12-08
7,Self-Guided Diffusion Models,26,2022-10-12
8,DiffMat: Latent diffusion models for image-gui...,8,2024-01-01
9,Adding Conditional Control to Text-to-Image Di...,2934,2023-02-10


In [27]:
citation_data = get_paper_references(df.loc[0].paperId, fields=["title", "abstract", "citationCount", "publicationDate"], year=2022, limit=150)
citations_df = pd.json_normalize(citation_data['data'])
citations_df

Unnamed: 0,citedPaper.paperId,citedPaper.title,citedPaper.abstract,citedPaper.citationCount,citedPaper.publicationDate
0,c231418d40fa0eb67ee6a1901add09e9af433a4f,Guided Diffusion from Self-Supervised Diffusio...,Guidance serves as a key concept in diffusion ...,9.0,2023-12-14
1,2f403d194b42d10c3a438736388c8812831b1361,Latent Traversals in Generative Models as Pote...,Despite the significant recent progress in dee...,9.0,2023-04-25
2,721f9afcfe45c70ba0a98f8aed833e28b278b275,DiffFit: Unlocking Transferability of Large Di...,Diffusion models have proven to be highly effe...,62.0,2023-04-13
3,d1c33172c2ffbc038f0598f3ac56bb04af79c904,An Edit Friendly DDPM Noise Space: Inversion a...,Denoising diffusion probabilistic models (DDPM...,91.0,2023-04-12
4,d9b95937934d7291b7c253b28b6c9aaee033c91d,Forget-Me-Not: Learning to Forget in Text-to-I...,The significant advances in applications of te...,124.0,2023-03-30
5,a64e9fe44051d93202853a43656def4b44f84883,Discovering Interpretable Directions in the Se...,Denoising Diffusion Models (DDMs) have emerged...,28.0,2023-03-20
6,6d23c64e7feb217d53f01f532e8e8885e62f76b2,Unsupervised Discovery of Semantic Latent Dire...,"Despite the success of diffusion models (DMs),...",19.0,2023-02-24
7,d24b4f34197df0257390b57f02537e6ce3284f2e,Universal Guidance for Diffusion Models,Typical diffusion models are trained to accept...,177.0,2023-02-14
8,be6d7185c4579d911e9ad059b3834395d43d7f28,Minimizing Trajectory Curvature of ODE-based G...,"Recent ODE/SDE-based generative models, such a...",42.0,2023-01-27
9,7acc71fad70c4c65203739f156bcb440587df901,Scalable Adaptive Computation for Iterative Ge...,Natural data is redundant yet predominant arch...,93.0,2022-12-22


In [20]:
paper_id = "649def34f8be52c8b66281af98ae884c09aef38b"
citation_data = get_paper_citations(paper_id, fields=["title", "abstract", "citationCount", "publicationDate"], year=2022, limit=15)

# Convert citation data to a pandas DataFrame
if citation_data and 'data' in citation_data:
    citations_df = pd.json_normalize(citation_data['data'])
    print(citations_df)
else:
    print("No citation data available.")


                         citingPaper.paperId  \
0   0c94efd5648b69f369d278afc2b3419139238c50   
1   9f3ae8055e227edb413c54417c9c216f1f554f52   
2   db4e8d662dbe80f3ddf78e69b1c1053500894d25   
3   50a3f0dd12114fb2ca90a5511a6325524c3f6013   
4   5eafc35cedbb28b033009947ca73b40007b2b407   
5   4772df95a893061e0fedc9a09c56f95d8926fb9d   
6   036fd8b92722023742dc7fceb19a2ea1d56828de   
7   ca48013d99a608e800ae34388fe9fba9ea6ca280   
8   3dd5ad34012164c4ec9c571a12cc6a7561683dea   
9   02487e9dfefdcb667be4dc160780321555662a2e   
10  3a66e3a6fe1f9e1d95140f0c8fefc4ff964ba89d   
11  46c96074a4e33e90bc01b3c869cac9ea3a0b8fe7   
12  d33856de0e4f1499d6a48fdbd91e92700127b2fe   
13  e11c67919830bb0a29a86b05936d467227accc81   
14  c5a528afd98274902b4987b887f19ecd282ca8bd   

                                    citingPaper.title  \
0   Emerging strategies for addressing flood-damag...   
1   Generative Adversarial Reviews: When LLMs Beco...   
2   A Database of Stress-Strain Properties Auto-ge...   
3  