In [None]:
import requests
import json

# Updated API Endpoint
SEMANTIC_SCHOLAR_BASE_URL = "https://api.semanticscholar.org/graph/v1/paper/search/bulk"
HEADERS = {"User-Agent": "DeepCite/1.0"}

def fetch_papers_by_keywords(keywords, fields="title,abstract,url,year,citationCount", limit=1000):
    papers = []
    offset = 0
    while len(papers) < limit:
        params = {
            "query": keywords,
            "fields": fields,
            "limit": min(limit - len(papers), 100),  # Fetch in batches of 100
            "offset": offset
        }
        response = requests.get(SEMANTIC_SCHOLAR_BASE_URL, headers=HEADERS, params=params)
        if response.status_code == 200:
            data = response.json()
            new_papers = data.get("data", [])
            papers.extend(new_papers)
            offset += 100  # Move the offset to get the next set of papers
        else:
            print(f"Failed to fetch data, status code: {response.status_code}")
            break
    return papers[:limit]

def save_papers_to_json(query, filename="papers.json"):
    papers = fetch_papers_by_keywords(query)
    
    if not papers:
        print("No papers retrieved.")
        return
    
    # Format data to only include necessary fields
    formatted_papers = []
    for paper in papers:
        formatted_papers.append({
            "title": paper.get("title", "No title available"),
            "abstract": paper.get("abstract", "No abstract available"),
            "year": paper.get("year", "Unknown"),
            "url": paper.get("url", "Unknown"),
            "citation_count": paper.get("citationCount", 0)
        })
    
    # Save to JSON file
    with open(filename, 'w') as f:
        json.dump(formatted_papers, f, indent=4)

# Example usage
query = "Supervised machine learning use in healthcare"
save_papers_to_json(query, "top_1000_papers.json")

print("Papers saved to top_1000_papers.json")
