# ResearchTrader API Testing

This notebook tests each endpoint of the ResearchTrader API to ensure everything is working correctly.

In [1]:
import httpx
import asyncio
import json
from pprint import pprint
import time

# API configuration
# !!! Make sure this matches your running backend API URL !!!
API_BASE_URL = "http://127.0.0.1:8000"

# --- Global variables to store results between steps ---
# Stores the ID of the paper found in the search step
paper_id_to_test = None
# Stores the full paper details once retrieved after processing
detailed_paper = None
# ---

print(f"Testing API running at: {API_BASE_URL}")

# Helper for pretty printing
def print_json(data, title="Response Data"):
    print(f"\n--- {title} ---")
    if data:
        try:
            print(json.dumps(data, indent=2))
        except TypeError: # Handle potential non-serializable types like datetime
            pprint(data)
    else:
        print("(No data)")
    print("--- End Response ---")

Testing API running at: http://127.0.0.1:8000


### Test POST /papers/ (Search & Process Trigger)

In [2]:
async def test_search_and_process(query="transformer finance forecasting", max_results=3):
    """Tests the search endpoint (POST /papers/) which triggers background processing."""
    global paper_id_to_test # Allow modifying the global variable
    print(f"\n--- Testing POST /papers/ ---")
    print(f"Query: '{query}', Max Results: {max_results}")
    start_time = time.time()

    payload = {"query": query, "max_results": max_results, "force_reprocess": False} # Set force_reprocess if needed
    search_results_summary = None

    async with httpx.AsyncClient(timeout=60.0) as client:
        try:
            response = await client.post(f"{API_BASE_URL}/papers/", json=payload)
            response.raise_for_status() # Raise exception for bad status codes

            print(f"Status code: {response.status_code}")
            elapsed = time.time() - start_time
            print(f"Response time: {elapsed:.2f} seconds")

            response_data = response.json()
            search_results_summary = response_data # Store for display

            # Response is List[PaperSummaryResponse]
            if isinstance(response_data, list) and len(response_data) > 0:
                print(f"Found {len(response_data)} papers. Background processing triggered.")
                first_paper = response_data[0]
                paper_id_to_test = first_paper.get("paper_id") # Store the ID
                print(f"Using Paper ID for subsequent tests: {paper_id_to_test}")
                print(f"Title: {first_paper.get('title')}")
                print(f"Initial Summary (if cached): {first_paper.get('summary', 'Not available yet')}")
                print("\nPausing briefly for background processing to potentially start...")
                await asyncio.sleep(3) # Give backend a few seconds to kick off tasks
            elif isinstance(response_data, list) and len(response_data) == 0:
                print("Search returned successfully, but no papers found.")
                paper_id_to_test = None
            else:
                print("No papers found or unexpected response structure")
                paper_id_to_test = None

        except httpx.HTTPStatusError as e:
            print(f"HTTP Error: {e.response.status_code}")
            try:
                print(f"Detail: {e.response.json()}")
            except:
                print(f"Body: {e.response.text}")
            paper_id_to_test = None
        except httpx.RequestError as e:
            print(f"Request Error: Could not connect to API at {API_BASE_URL}. Is it running?")
            print(f"Details: {e}")
            paper_id_to_test = None
        except Exception as e:
            print(f"An unexpected error occurred: {type(e).__name__}: {e}")
            paper_id_to_test = None

    print_json(search_results_summary, title="Search Results Summary")

await test_search_and_process()


--- Testing POST /papers/ ---
Query: 'transformer finance forecasting', Max Results: 3
Status code: 200
Response time: 2.53 seconds
Found 3 papers. Background processing triggered.
Using Paper ID for subsequent tests: 2504.13529v1
Title: Risk-aware black-box portfolio construction using Bayesian optimization with adaptive weighted Lagrangian estimator
Initial Summary (if cached): Not available yet

Pausing briefly for background processing to potentially start...

--- Search Results Summary ---
[
  {
    "paper_id": "2504.13529v1",
    "title": "Risk-aware black-box portfolio construction using Bayesian optimization with adaptive weighted Lagrangian estimator",
    "authors": [
      "Zinuo You",
      "John Cartlidge",
      "Karen Elliott",
      "Menghan Ge",
      "Daniel Gold"
    ],
    "abstract": "Existing portfolio management approaches are often black-box models due to\nsafety and commercial issues in the industry. However, their performance can\nvary considerably whenever m

### Test GET /papers/{id} (Get Details - Waits for Processing)

In [3]:
async def test_get_paper_details(paper_id, max_retries=15, delay=15):
    """Tests getting full paper details (GET /papers/{id}), retrying until processed."""
    global detailed_paper # Allow modifying the global variable
    print(f"\n--- Testing GET /papers/{paper_id} (with retries) ---")
    if not paper_id:
        print("No paper ID provided from search step. Skipping.")
        return

    async with httpx.AsyncClient(timeout=45.0) as client:
        last_response_data = None
        for attempt in range(max_retries):
            print(f"\nAttempt {attempt + 1}/{max_retries} to fetch details for {paper_id}")
            start_time = time.time()
            try:
                # Add force_reprocess=True here if you want to test reprocessing
                response = await client.get(f"{API_BASE_URL}/papers/{paper_id}") #, params={"force_reprocess": True})
                response.raise_for_status()

                elapsed = time.time() - start_time
                print(f"Status code: {response.status_code}, Response time: {elapsed:.2f} seconds")

                paper_data = response.json()
                last_response_data = paper_data # Store last successful response

                # Check if content (summaries) are populated
                has_content = False
                if paper_data.get("content"):
                    # Check for both summaries as a sign processing is likely complete
                    if paper_data["content"].get("comprehensive_summary") and paper_data["content"].get("structured_summary"):
                        has_content = True

                if has_content:
                    print(">>> Paper details retrieved successfully with content!")
                    detailed_paper = paper_data # Store details
                    # Display some key info
                    print(f"Title: {paper_data.get('metadata', {}).get('title', 'N/A')}")
                    print(f"Objective: {paper_data.get('content', {}).get('structured_summary', {}).get('objective', 'N/A')}")
                    ft_status = "Available" if paper_data.get('content', {}).get('full_text') else 'Not available/extracted'
                    print(f"Full Text: {ft_status}")
                    return # Exit loop on success
                else:
                    print("Paper found, but content (summaries) not yet available. Waiting...")

            except httpx.HTTPStatusError as e:
                if e.response.status_code == 404:
                    print("Paper not found (404). Might not exist or processing failed severely.")
                    return # Stop retrying if not found
                else:
                    print(f"HTTP Error fetching paper: {e.response.status_code}")
                    try: print(f"Detail: {e.response.json()}")
                    except: print(f"Body: {e.response.text}")
                    # Decide whether to retry on other errors (currently retries)

            except httpx.RequestError as e:
                print(f"Request Error fetching paper: {e}")
                # Decide whether to retry (currently retries)

            except json.JSONDecodeError as e:
                 print(f"JSON Decode Error fetching paper: {e}. Response text: {response.text}")
                 # Decide whether to retry (currently retries)

            except Exception as e:
                print(f"An unexpected error occurred: {type(e).__name__}: {e}")
                # Decide whether to retry (currently retries)

            # Wait before retrying if not successful and not a fatal error
            if attempt < max_retries - 1:
                print(f"Waiting {delay} seconds before next attempt...")
                await asyncio.sleep(delay)
            else:
                print(f">>> Max retries ({max_retries}) reached. Paper content might not be available.")
                if last_response_data:
                     print("Storing last successful response data (potentially incomplete).")
                     detailed_paper = last_response_data # Store potentially incomplete data
                return # Exit loop after max retries

await test_get_paper_details(paper_id_to_test)


--- Testing GET /papers/2504.13529v1 (with retries) ---

Attempt 1/15 to fetch details for 2504.13529v1
Status code: 200, Response time: 0.00 seconds
>>> Paper details retrieved successfully with content!
Title: Risk-aware black-box portfolio construction using Bayesian optimization with adaptive weighted Lagrangian estimator
Objective: To optimize black-box portfolio management models using a novel Bayesian optimization framework that balances performance and risk.
Full Text: Available


### Test GET /papers/ (List Cached Papers)

In [4]:
async def test_list_cached_papers():
    """Tests listing all cached papers (GET /papers/)"""
    print(f"\n--- Testing GET /papers/ ---")
    start_time = time.time()
    cached_papers_list = None

    async with httpx.AsyncClient(timeout=30.0) as client:
        try:
            response = await client.get(f"{API_BASE_URL}/papers/")
            response.raise_for_status()

            print(f"Status code: {response.status_code}")
            elapsed = time.time() - start_time
            print(f"Response time: {elapsed:.2f} seconds")

            cached_papers = response.json()

            if isinstance(cached_papers, list):
                print(f"Found {len(cached_papers)} papers currently in cache.")
                cached_papers_list = cached_papers
            else:
                print("Unexpected response format.")

        except httpx.HTTPStatusError as e:
            print(f"HTTP Error: {e.response.status_code}")
            try: print(f"Detail: {e.response.json()}")
            except: print(f"Body: {e.response.text}")
        except httpx.RequestError as e:
            print(f"Request Error: {e}")
        except Exception as e:
            print(f"An unexpected error occurred: {type(e).__name__}: {e}")

    print_json(cached_papers_list, title="List of Cached Papers (Summary View)")


await test_list_cached_papers()


--- Testing GET /papers/ ---
Status code: 200
Response time: 0.01 seconds
Found 3 papers currently in cache.

--- List of Cached Papers (Summary View) ---
[
  {
    "paper_id": "2504.13529v1",
    "title": "Risk-aware black-box portfolio construction using Bayesian optimization with adaptive weighted Lagrangian estimator",
    "authors": [
      "Zinuo You",
      "John Cartlidge",
      "Karen Elliott",
      "Menghan Ge",
      "Daniel Gold"
    ],
    "abstract": "Existing portfolio management approaches are often black-box models due to\nsafety and commercial issues in the industry. However, their performance can\nvary considerably whenever market conditions or internal trading strategies\nchange. Furthermore, evaluating these non-transparent systems is expensive,\nwhere certain budgets limit observations of the systems. Therefore, optimizing\nperformance while controlling the potential risk of these financial systems has\nbecome a critical challenge. This work presents a novel Ba

### Test POST /qa/ (Q&A)


In [5]:
async def test_qa(paper_id, question):
    """Tests asking a question about a paper (POST /qa/)"""
    print(f"\n--- Testing POST /qa/ ---")
    if not paper_id:
        print("No paper ID available for Q&A test. Skipping.")
        return
    # Check if detailed_paper was successfully populated in the get_details step
    if not detailed_paper or not detailed_paper.get("content"):
         print("Detailed paper info (with content) not retrieved in previous step. Skipping Q&A.")
         print("(This might happen if get_details timed out or failed)")
         return

    print(f"Using Paper ID: {paper_id}")
    print(f"Question: {question}")
    start_time = time.time()
    qa_result_data = None

    payload = {"question": question, "paper_ids": [paper_id]}

    async with httpx.AsyncClient(timeout=120.0) as client: # Longer timeout for LLM
        try:
            response = await client.post(f"{API_BASE_URL}/qa/", json=payload)
            response.raise_for_status()

            print(f"Status code: {response.status_code}")
            elapsed = time.time() - start_time
            print(f"Response time: {elapsed:.2f} seconds")

            qa_result = response.json() # Response is QAResponse model
            qa_result_data = qa_result
            print(f"Context Paper IDs Used: {qa_result.get('context_paper_ids')}")
            print(f"\nAnswer:\n{qa_result.get('answer', 'N/A')}")

        except httpx.HTTPStatusError as e:
            print(f"HTTP Error: {e.response.status_code}")
            try:
                 err_detail = e.response.json().get('detail', e.response.text)
                 print(f"Detail: {err_detail}")
                 if e.response.status_code == 404 and "not found in the cache" in err_detail:
                     print("(This confirms the check for cached papers is working)")
            except: print(f"Body: {e.response.text}")
        except httpx.RequestError as e:
            print(f"Request Error: {e}")
        except Exception as e:
            print(f"An unexpected error occurred: {type(e).__name__}: {e}")

    print_json(qa_result_data, title="Q&A Response")

question_to_ask = "What is the main objective of this paper and what methods were used?"
await test_qa(paper_id_to_test, question_to_ask)


--- Testing POST /qa/ ---
Using Paper ID: 2504.13529v1
Question: What is the main objective of this paper and what methods were used?
Status code: 200
Response time: 2.95 seconds
Context Paper IDs Used: ['2504.13529v1']

Answer:
The main objective of the paper is to optimize black-box portfolio management models using a novel Bayesian optimization framework that balances performance and risk. The methods used include:

- An adaptive weighted Lagrangian estimator for dual objective optimization.
- Bayesian optimization with importance sampling to guide the surrogate model.
- Three surrogate models: Gaussian Processes (GP), Tree-structured Parzen Estimators (TPE), and Bayesian Neural Networks (BNN).
- Acquisition functions such as Expected Improvement (EI), Upper Confidence Bound (UCB), and Probability of Improvement (PI) [ID: 2504.13529v1].

--- Q&A Response ---
{
  "question": "What is the main objective of this paper and what methods were used?",
  "answer": "The main objective of th

### Test POST /strategy/ (Strategy Generation)

In [6]:
async def test_strategy_generation(paper_id, prompt):
    """Tests generating a trading strategy (POST /strategy/)"""
    print(f"\n--- Testing POST /strategy/ ---")
    if not paper_id:
        print("No paper ID available for Strategy Generation test. Skipping.")
        return
    # Check if detailed_paper was successfully populated
    if not detailed_paper or not detailed_paper.get("content"):
         print("Detailed paper info (with content) not retrieved in previous step. Skipping Strategy Generation.")
         print("(This might happen if get_details timed out or failed)")
         return

    print(f"Using Paper ID: {paper_id}")
    print(f"Prompt: {prompt}")
    start_time = time.time()
    strategy_result_data = None

    payload = {"paper_ids": [paper_id], "strategy_prompt": prompt}

    async with httpx.AsyncClient(timeout=180.0) as client: # Even longer timeout for strategy generation
        try:
            response = await client.post(f"{API_BASE_URL}/strategy/", json=payload)
            response.raise_for_status()

            print(f"Status code: {response.status_code}")
            elapsed = time.time() - start_time
            print(f"Response time: {elapsed:.2f} seconds")

            strategy_response = response.json() # Response is StrategyGenerationResponse model
            strategy_result_data = strategy_response
            print(f"\nGeneration Notes: {strategy_response.get('notes')}")
            print(f"Context Paper IDs Used: {strategy_response.get('context_paper_ids')}")

            # Display the structured strategy output nicely
            strategy_output = strategy_response.get('strategy')
            if strategy_output:
                print("\n--- Strategy Description ---")
                print(strategy_output.get('strategy_description', 'N/A'))
                print("\n--- Pseudocode / Logic ---")
                print(strategy_output.get('pseudocode', 'N/A'))
                print("\n--- How to Use / Limitations ---")
                print(strategy_output.get('how_to_use', 'N/A'))
                print("\n--- Python Code Outline ---")
                print(f"```python\n{strategy_output.get('python_code', '# N/A')}\n```")
            else:
                # This might happen if 'notes' indicated failure
                print("No structured strategy output found in response.")

        except httpx.HTTPStatusError as e:
            print(f"HTTP Error: {e.response.status_code}")
            try:
                 err_detail = e.response.json().get('detail', e.response.text)
                 print(f"Detail: {err_detail}")
                 if e.response.status_code == 404 and "not found in the cache" in err_detail:
                     print("(This confirms the check for cached papers is working)")
            except: print(f"Body: {e.response.text}")
        except httpx.RequestError as e:
            print(f"Request Error: {e}")
        except Exception as e:
            print(f"An unexpected error occurred: {type(e).__name__}: {e}")

    # print_json(strategy_result_data, title="Full Strategy Generation Response") # Optional: Print raw JSON

strategy_prompt_example = f"Generate a simple strategy based on the core concept mentioned in the abstract or objective of paper {paper_id_to_test}. Keep it high level."
await test_strategy_generation(paper_id_to_test, strategy_prompt_example)


--- Testing POST /strategy/ ---
Using Paper ID: 2504.13529v1
Prompt: Generate a simple strategy based on the core concept mentioned in the abstract or objective of paper 2504.13529v1. Keep it high level.
Status code: 200
Response time: 15.37 seconds

Generation Notes: Structured strategy outline generated successfully.
Context Paper IDs Used: ['2504.13529v1']

--- Strategy Description ---
This strategy utilizes a Bayesian optimization framework to construct a portfolio that balances maximizing returns and minimizing risk. The core logic is based on the adaptive weighted Lagrangian estimator proposed in the paper [ID: 2504.13529v1], which optimizes portfolio performance while controlling for variance in returns. By leveraging surrogate models such as Gaussian Processes (GP) and Tree-structured Parzen Estimators (TPE), the strategy dynamically adjusts to market conditions, ensuring that the portfolio adapts to changing risk profiles while aiming for high Sharpe ratios.

--- Pseudocode /