MODULE 1: Topic Input & Paper Search


In [27]:
# ============================================
# MODULE 1: Topic Input & Paper Search
# ============================================

import json
import os
from semanticscholar import SemanticScholar
from datetime import datetime

# ====================
# 1. SETUP API KEY
# ====================

def setup_api_key():
    sch = SemanticScholar()
    print("Using Semantic Scholar without API key (public access)")
    return sch

# ====================
# 2. PAPER SEARCH
# ====================

def search_papers(topic, limit=3):
    print(f"\nSearching for papers on: '{topic}'")
    print(f"Requesting {limit} papers from Semantic Scholar...")

    sch = setup_api_key()

    try:
        results = sch.search_paper(query=topic)

        papers = []
        count = 0

        for paper in results:
            paper_data = {
                "title": paper.title,
                "authors": [a.name for a in paper.authors] if paper.authors else [],
                "year": paper.year,
                "paperId": paper.paperId,
                "abstract": paper.abstract[:300] + "..." if paper.abstract else "No abstract available",
                "citationCount": paper.citationCount,
                "venue": paper.venue,
                "url": paper.url,
                "pdf_url": paper.openAccessPdf["url"] if paper.openAccessPdf else None,
                "has_pdf": bool(paper.openAccessPdf)
            }
            papers.append(paper_data)
            count += 1
            if count >= limit:
                break

        print("Search complete!")
        print(f"Total papers found: {len(papers)}")
        print(f"Papers with PDF available: {sum(1 for p in papers if p['has_pdf'])}")

        return {
            "topic": topic,
            "search_timestamp": datetime.now().isoformat(),
            "total_results": len(papers),
            "papers_with_pdf": sum(1 for p in papers if p["has_pdf"]),
            "papers": papers
        }

    except Exception as e:
        print(f"Error searching papers: {e}")
        return None

# ====================
# 3. SAVE METADATA
# ====================

def save_search_results(data):
    safe_topic = "".join(c for c in data["topic"] if c.isalnum() or c == " ").replace(" ", "_")
    filename = f"paper_search_results_{safe_topic}.json"

    os.makedirs("data/search_results", exist_ok=True)
    filepath = os.path.join("data/search_results", filename)

    with open(filepath, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=4, ensure_ascii=False)

    print(f"Search results saved to: {filepath}")
    return filepath

# ====================
# 4. DISPLAY RESULTS
# ====================

def display_search_results(data):
    papers = data["papers"]

    print("\n" + "=" * 80)
    print(f"SEARCH RESULTS: {data['topic']}")
    print("=" * 80)

    for i, paper in enumerate(papers):
        print(f"\n{i+1}. {paper['title']}")
        print(f"   Year: {paper['year']} | Citations: {paper['citationCount']}")
        print(f"   PDF Available: {'YES' if paper['has_pdf'] else 'NO'}")

# ====================
# 5. MAIN FUNCTION
# ====================

def main_search():
    print("\n" + "=" * 80)
    print("MODULE 1: TOPIC INPUT & PAPER SEARCH")
    print("=" * 80)

    topic = input("\nEnter research topic: ").strip()
    if not topic:
        topic = "data mining"

    results = search_papers(topic, limit=3)

    if results:
        save_path = save_search_results(results)
        display_search_results(results)
        print(f"\nModule 1 complete! Results saved to: {save_path}")
    else:
        print("No results found.")

if __name__ == "__main__":
    main_search()



MODULE 1: TOPIC INPUT & PAPER SEARCH

Enter research topic: machine learning

Searching for papers on: 'machine learning'
Requesting 3 papers from Semantic Scholar...
Using Semantic Scholar without API key (public access)
Search complete!
Total papers found: 3
Papers with PDF available: 3
Search results saved to: data/search_results/paper_search_results_machine_learning.json

SEARCH RESULTS: machine learning

1. Physics-informed machine learning
   Year: 2021 | Citations: 4961
   PDF Available: YES

2. Fashion-MNIST: a Novel Image Dataset for Benchmarking Machine Learning Algorithms
   Year: 2017 | Citations: 9877
   PDF Available: YES

3. A Survey on Bias and Fairness in Machine Learning
   Year: 2019 | Citations: 5120
   PDF Available: YES

Module 1 complete! Results saved to: data/search_results/paper_search_results_machine_learning.json


In [28]:
import sys
!{sys.executable} -m pip install semanticscholar python-dotenv

