In [1]:
# Test Service Connectivity
import requests
import subprocess
import json

services_to_test = {
    "FastAPI": "http://localhost:8000/api/v1/health",
    "PostgreSQL (via API)": "http://localhost:8000/api/v1/health",
    "Ollama": "http://localhost:11434/api/version",
    "OpenSearch": "http://localhost:9200/_cluster/health",
    "Airflow": "http://localhost:8080/health",
}

print("WEEK 2 PREREQUISITE CHECK")
print("=" * 50)

all_healthy = True

for service_name, url in services_to_test.items():
    try:
        response = requests.get(url, timeout=5)
        if response.status_code == 200:
            print(f"✓ {service_name}: Healthy")
        else:
            print(f"✗ {service_name}: HTTP {response.status_code}")
            all_healthy = False
    except requests.exceptions.ConnectionError:
        print(f"✗ {service_name}: Not accessible")
        all_healthy = False
    except Exception as e:
        print(f"✗ {service_name}: {type(e).__name__}")
        all_healthy = False

print()
if all_healthy:
    print("All services healthy! Ready for Week 2 development.")
else:
    print("Some services need attention. Check Week 1 notebook.")

WEEK 2 PREREQUISITE CHECK
✓ FastAPI: Healthy
✓ PostgreSQL (via API): Healthy
✓ Ollama: Healthy
✓ OpenSearch: Healthy
✓ Airflow: Healthy

All services healthy! Ready for Week 2 development.


In [2]:
# ensure repo root and src are on sys.path
import sys, pathlib

repo_root = pathlib.Path().resolve()
# if notebooks live in a folder, adjust repo_root = repo_root.parent etc.
if (repo_root / "src").exists():
    sys.path.insert(0, str(repo_root))
    sys.path.insert(0, str(repo_root / "src"))
else:
    for p in repo_root.parents:
        if (p / "src").exists():
            sys.path.insert(0, str(p))
            sys.path.insert(0, str(p / "src"))
            break
print("sys.path[0:3]:", sys.path[:3])

sys.path[0:3]: ['/Users/surekha/Documents/projects/RAG/research-assistant/src', '/Users/surekha/Documents/projects/RAG/research-assistant', '/Users/surekha/.local/share/uv/python/cpython-3.12.12-macos-aarch64-none/lib/python312.zip']


In [3]:
import asyncio
from datetime import datetime, timedelta

from src.services.arxiv.factory import make_arxiv_client

print("Testing ARXIV API Client")
print("=" * 40)
arxiv_client = make_arxiv_client()
print(f" Client created: {arxiv_client.base_url}")
print(f" Rate limit: {arxiv_client.rate_limit_delay}")
print(f" Max results: {arxiv_client.max_results}")
print(f" Category: {arxiv_client.search_category}")
print()

Testing ARXIV API Client
 Client created: https://export.arxiv.org/api/query
 Rate limit: 3.0
 Max results: 15
 Category: cs.AI



In [4]:
async def test_fetch_papers():
    """Testing fetching papers from arxiv client with rate limiting"""

    print("Test 1: Fetch Recent cs.AI papers")
    try:
        papers = await arxiv_client.fetch_papers(
            max_results=1, sort_by="submittedDate", sort_order="descending"
        )

        print(f"Fetched {len(papers)} papers")

        if papers:
            for i,paper in enumerate(papers[:2],1):
                print(f"   {i}. [{paper.arxiv_id}] {paper.title[:60]}...")
                print(f"      Authors: {', '.join(paper.authors[:2])}{'...' if len(paper.authors) > 2 else ''}")
                print(f"      Categories: {', '.join(paper.categories)}")
                print(f"      Published: {paper.published_date}")
                print()

        return papers
    except Exception as e:
        print(f"Error fetching papers: {e}")
        if "503" in str(e):
            print(" arxiv API temporarily unavailable(normal)")
            print(" Rate limiting and error handling working correctly")
        return []

papers = await test_fetch_papers()

Test 1: Fetch Recent cs.AI papers
Fetched 1 papers
   1. [2511.16674v1] Dataset Distillation for Pre-Trained Self-Supervised Vision ...
      Authors: George Cazenavette, Antonio Torralba...
      Categories: cs.CV, cs.AI, cs.LG
      Published: 2025-11-20T18:59:57Z



In [7]:
# test date filtering
async def test_date_filtering():

    print("Test 2: Date Range Filtering")

    from_date = "20250808"
    to_date = "20250809"

    try:
        date_papers = await arxiv_client.fetch_papers(max_results=2, from_date=from_date,to_date=to_date)

        print(f" Date filtering test: {len(date_papers)} papers from {from_date}-{to_date}")
        if date_papers:
            for i, paper in enumerate(date_papers, 1):
                print(f"   {i}. [{paper.arxiv_id}] {paper.title[:60]}...")
                print(
                    f"      Authors: {', '.join(paper.authors[:2])}{'...' if len(paper.authors) > 2 else ''}"
                )
                print(f"      Categories: {', '.join(paper.categories)}")
                print(f"      Published: {paper.published_date}")
                print()

        return date_papers

    except Exception as e:
        print(f"✗ Date filtering error: {e}")
        return []

# Run date filtering test
date_papers = await test_date_filtering()

Test 2: Date Range Filtering
 Date filtering test: 2 papers from 20250808-20250809
   1. [2508.07111v1] Investigating Intersectional Bias in Large Language Models u...
      Authors: Falaah Arif Khan, Nivedha Sivakumar...
      Categories: cs.CL, cs.AI
      Published: 2025-08-09T22:24:40Z

   2. [2508.07107v2] Designing a Feedback-Driven Decision Support System for Dyna...
      Authors: Timothy Oluwapelumi Adeyemi, Nadiah Fahad AlOtaibi
      Categories: cs.AI, cs.CY
      Published: 2025-08-09T21:24:54Z



In [None]:
# test pdf download and caching locally
async def test_pdf_download(test_papers):

    print("Test 3: PDF Download & Caching")

    if not test_papers:
        print("No papers available for PDF download test")

    test_paper = test_papers[0]
    print(f"Testing PDF download for : {test_paper.arxiv_id}")
    print(f"Title: {test_paper.title[:60]}")

    try:
        pdf_path = await arxiv_client.download_pdf(test_paper)

        if pdf_path and pdf_path.exists():
            size_mb = pdf_path.stat().st_size / (1024*1024)
            print(f"PDF downloaded {pdf_path.name} ({size_mb:.2f}) MB")
            return pdf_path
        else:
            print("PDF downlod failed")
            return None
        
    except Exception as e:
        print(f"PDF download failed")
        return None
    
pdf_path = await test_pdf_download(date_papers[:1])

Test 3: PDF Download & Caching
Testing PDF download for : 2508.07111v1


AttributeError: 'list' object has no attribute 'title'