From 0655d2399c60d9d9a1526073a3b0ae1ced2b4b93 Mon Sep 17 00:00:00 2001
From: Anthony <agarlan@sandia.gov>
Date: Wed, 29 Oct 2025 05:38:37 +0000
Subject: [PATCH 1/3] feat(utilities): enhance logging in tool workflow and LLM
 response handling

- Add detailed step logging in tool_utils.py for execute_tools_workflow,
  execute_single_tool, and prepare_tool_arguments functions to trace tool
  execution process
- Modify llm logging in error_utils.py to output full llm_response instead
  of just has_tool_calls for better debugging
- Update pdfbasic main.py to include logging import and adjust
  _analyze_pdf_content function to accept optional original_filename parameter
  for improved context handling in PDF analysis
---
 .../application/chat/utilities/error_utils.py |   2 +-
 .../application/chat/utilities/tool_utils.py  |   5 +
 backend/mcp/pdfbasic/main.py                  | 441 ++++++++----------
 backend/modules/llm/litellm_caller.py         |   5 +
 backend/modules/mcp_tools/client.py           |   1 +
 base-image-update-plan.md                     | 105 -----
 config/overrides/mcp.json                     |  10 +
 7 files changed, 208 insertions(+), 361 deletions(-)
 delete mode 100644 base-image-update-plan.md

diff --git a/backend/application/chat/utilities/error_utils.py b/backend/application/chat/utilities/error_utils.py
index 3ab2c56..c2ad5ea 100644
--- a/backend/application/chat/utilities/error_utils.py
+++ b/backend/application/chat/utilities/error_utils.py
@@ -85,7 +85,7 @@ async def safe_call_llm_with_tools(
             llm_response = await llm_caller.call_with_tools(
                 model, messages, tools_schema, tool_choice, temperature=temperature
             )
-            logger.info(f"LLM response received with tools only, has_tool_calls: {llm_response.has_tool_calls()}")
+            logger.info(f"LLM response received with tools only, llm_response: {llm_response}")
         return llm_response
     except Exception as e:
         logger.error(f"Error calling LLM with tools: {e}", exc_info=True)
diff --git a/backend/application/chat/utilities/tool_utils.py b/backend/application/chat/utilities/tool_utils.py
index 51140f9..35b3a67 100644
--- a/backend/application/chat/utilities/tool_utils.py
+++ b/backend/application/chat/utilities/tool_utils.py
@@ -35,6 +35,7 @@ async def execute_tools_workflow(
     
     Pure function that coordinates tool execution without maintaining state.
     """
+    logger.info("Step 4: Entering execute_tools_workflow")
     # Add assistant message with tool calls
     messages.append({
         "role": "assistant",
@@ -115,6 +116,7 @@ async def execute_single_tool(
     
     Pure function that doesn't maintain state - all context passed as parameters.
     """
+    logger.info("Step 5: Entering execute_single_tool")
     from . import notification_utils
     
     try:
@@ -233,6 +235,7 @@ def prepare_tool_arguments(tool_call, session_context: Dict[str, Any], tool_mana
     
     Pure function that transforms arguments based on context and tool schema.
     """
+    logger.info("Step 6: Entering prepare_tool_arguments")
     # Parse raw arguments
     raw_args = getattr(tool_call.function, "arguments", {})
     if isinstance(raw_args, dict):
@@ -286,6 +289,7 @@ def to_url(key: str) -> str:
             ref = files_ctx.get(fname)
             if ref and ref.get("key"):
                 url = to_url(ref["key"])
+                logger.info(f"Step 6.1: Rewriting filename to URL: {url}")
                 parsed_args.setdefault("original_filename", fname)
                 parsed_args["filename"] = url
                 parsed_args.setdefault("file_url", url)
@@ -304,6 +308,7 @@ def to_url(key: str) -> str:
                 else:
                     urls.append(fname)
             if urls:
+                logger.info(f"Step 6.1: Rewriting filenames to URLs: {urls}")
                 parsed_args.setdefault("original_file_names", originals)
                 parsed_args["file_names"] = urls
                 parsed_args.setdefault("file_urls", urls)
diff --git a/backend/mcp/pdfbasic/main.py b/backend/mcp/pdfbasic/main.py
index d55b25b..c429fc0 100644
--- a/backend/mcp/pdfbasic/main.py
+++ b/backend/mcp/pdfbasic/main.py
@@ -7,8 +7,10 @@
 import base64
 import io
 import re
+import requests
+import logging
 from collections import Counter
-from typing import Any, Dict, Annotated
+from typing import Any, Dict, Annotated, Optional
 
 # This tool requires the PyPDF2 and reportlab libraries.
 # Install them using: pip install PyPDF2 reportlab
@@ -19,31 +21,59 @@
 
 from fastmcp import FastMCP
 
+logger = logging.getLogger(__name__)
+
 mcp = FastMCP("PDF_Analyzer")
 
 
-def _analyze_pdf_content(instructions: str, filename: str, file_data_base64: str) -> Dict[str, Any]:
+def _analyze_pdf_content(instructions: str, filename: str, original_filename: Optional[str] = None) -> Dict[str, Any]:
     """
     Core PDF analysis logic that can be reused by multiple tools.
     
     Args:
         instructions: Instructions for the tool, not used in this implementation.
         filename: The name of the file, which must have a '.pdf' extension.
-        file_data_base64: The Base64-encoded string of the PDF file content.
+        original_filename: The original name of the file.
 
     Returns:
         A dictionary containing the analysis results or an error message.
     """
     try:
         # print the instructions.
-        print(f"Instructions: {instructions}")
+        logger.info(f"Instructions: {instructions}")
         # 1. Validate that the filename is for a PDF
-        if not filename.lower().endswith('.pdf'):
+        if not (filename.lower().endswith('.pdf') or (original_filename and original_filename.lower().endswith('.pdf'))):
             return {"results": {"error": "Invalid file type. This tool only accepts PDF files."}}
 
         # 2. Decode the Base64 data and read the PDF content
-        decoded_bytes = base64.b64decode(file_data_base64)
-        pdf_stream = io.BytesIO(decoded_bytes)
+        # Check if filename is a URL (absolute or relative)
+        is_url = (
+            filename.startswith("http://") or
+            filename.startswith("https://") or
+            filename.startswith("/api/") or
+            filename.startswith("/")
+        )
+
+        if is_url:
+            # Convert relative URLs to absolute URLs
+            if filename.startswith("/"):
+                # Construct absolute URL from relative path
+                # Default to localhost:8000 for local development
+                import os
+                backend_url = os.getenv("BACKEND_URL", "http://localhost:8000")
+                url = f"{backend_url}{filename}"
+            else:
+                url = filename
+
+            logger.info(f"Step 9: Downloading file from URL: {url}")
+            response = requests.get(url)
+            response.raise_for_status()
+            pdf_stream = io.BytesIO(response.content)
+        else:
+            # Assume it's base64-encoded data
+            decoded_bytes = base64.b64decode(filename)
+            pdf_stream = io.BytesIO(decoded_bytes)
+        
         reader = PdfReader(pdf_stream)
 
         full_text = ""
@@ -56,7 +86,7 @@ def _analyze_pdf_content(instructions: str, filename: str, file_data_base64: str
             return {
                 "results": {
                     "operation": "pdf_analysis",
-                    "filename": filename,
+                    "filename": original_filename or filename,
                     "status": "Success",
                     "message": "PDF contained no extractable text.",
                     "total_word_count": 0,
@@ -78,7 +108,7 @@ def _analyze_pdf_content(instructions: str, filename: str, file_data_base64: str
         return {
             "results": {
                 "operation": "pdf_analysis",
-                "filename": filename,
+                "filename": original_filename or filename,
                 "total_word_count": total_word_count,
                 "top_100_words": top_100_words_dict
             }
@@ -92,258 +122,159 @@ def _analyze_pdf_content(instructions: str, filename: str, file_data_base64: str
         return {"results": {"error": f"PDF analysis failed: {str(e)}"}}
 
 
-@mcp.tool
-def analyze_pdf(
-    instructions: Annotated[str, "Instructions for the tool, not used in this implementation"],
-    filename: Annotated[str, "The name of the file, which must have a '.pdf' extension"],
-    file_data_base64: Annotated[str, "LLM agent can leave blank. Do NOT fill. This will be filled by the framework."] = ""
-) -> Dict[str, Any]:
-    """
-    Extract and analyze text content from PDF documents with comprehensive word frequency analysis.
-
-    This powerful PDF processing tool provides detailed text analytics for PDF documents:
-    
-    **PDF Text Extraction:**
-    - Extracts text from all pages in PDF documents
-    - Handles various PDF formats and structures
-    - Works with both text-based and scanned PDFs (text extraction only)
-    - Preserves document structure and content flow
-
-    **Text Analysis Features:**
-    - Complete word count across entire document
-    - Top 100 most frequently used words identification
-    - Case-insensitive word analysis for accurate frequency counting
-    - Word pattern recognition and linguistic analysis
-    - Document length and content density assessment
-
-    **Content Processing:**
-    - Intelligent text cleaning and normalization
-    - Punctuation and formatting handling
-    - Multi-language text support
-    - Special character and encoding management
-
-    **Analytics Insights:**
-    - Document vocabulary richness and complexity
-    - Key topic identification through word frequency
-    - Content themes and focus areas analysis
-    - Writing style and language pattern recognition
-    - Document structure and organization assessment
-
-    **Use Cases:**
-    - Academic paper and research document analysis
-    - Legal document keyword extraction and analysis
-    - Content marketing and SEO keyword research
-    - Document classification and categorization
-    - Research literature review and summarization
-    - Contract and agreement content analysis
-
-    **Supported PDF Types:**
-    - Research papers, reports, and academic documents
-    - Business documents, contracts, and agreements
-    - Marketing materials and content documents
-    - Technical documentation and manuals
-    - Legal documents and regulatory filings
-
-    **Output Format:**
-    - Structured word frequency data
-    - Total document word count statistics
-    - Top 100 words with occurrence frequencies
-    - Document metadata and processing information
-
-    Args:
-        instructions: Processing instructions or requirements (currently not used)
-        filename: PDF file name (must end with .pdf extension)
-        file_data_base64: Base64-encoded PDF content (automatically provided by framework)
-
-    Returns:
-        Dictionary containing:
-        - operation: Processing type confirmation
-        - filename: Source PDF file name
-        - total_word_count: Complete document word count
-        - top_100_words: Dictionary of most frequent words with counts
-        Or error message if PDF cannot be processed or contains no extractable text
+@mcp.tool
+def analyze_pdf(
+    instructions: Annotated[str, "Instructions for the tool, not used in this implementation"],
+    filename: Annotated[str, "The name of the file, which must have a '.pdf' extension"],
+    original_filename: Optional[str] = None
+) -> Dict[str, Any]:
     """
-    return _analyze_pdf_content(instructions, filename, file_data_base64)
-
-
-@mcp.tool
-def generate_report_about_pdf(
-    instructions: Annotated[str, "Instructions for the tool, not used in this implementation"],
-    filename: Annotated[str, "The name of the file, which must have a '.pdf' extension"],
-    file_data_base64: Annotated[str, "LLM agent can leave blank. Do NOT fill. This will be filled by the framework."] = ""
-) -> Dict[str, Any]:
-    """
-    Create comprehensive PDF analysis reports with professional formatting and detailed word frequency insights.
-
-    This advanced PDF reporting tool combines text analysis with professional document generation:
-    
-    **Complete PDF Analysis Workflow:**
-    - Performs full text extraction and word frequency analysis
-    - Generates professional analysis reports in PDF format
-    - Creates downloadable documents with structured data presentation
-    - Provides ready-to-share analytical insights
-
-    **Report Contents:**
-    - Executive summary with document overview
-    - Total word count and document statistics
-    - Top 100 most frequent words with occurrence counts
-    - Professional multi-column layout for easy reading
-    - Organized tabular presentation of word frequency data
-
-    **Report Features:**
-    - Clean, professional PDF formatting using ReportLab
-    - Multi-column layout optimizing space usage
-    - Clear headers and structured information hierarchy
-    - Page management for large datasets
-    - High-quality typography and spacing
-
-    **Document Generation:**
-    - Creates new PDF reports from analysis results
-    - Professional business document appearance
-    - Optimized layout for printing and digital sharing
-    - Comprehensive data presentation in readable format
-
-    **Use Cases:**
-    - Academic research document analysis reporting
-    - Legal document content analysis for litigation support
-    - Content marketing keyword research documentation
-    - Business document compliance and review reporting
-    - Research literature analysis and summarization
-    - Document classification and content audit reports
-
-    **Report Applications:**
-    - Stakeholder presentations with document insights
-    - Content strategy planning based on word analysis
-    - Academic research methodology documentation
-    - Legal discovery and document review processes
-    - Quality assurance for written content
-
-    **Output Features:**
-    - Professional PDF report with embedded analysis
-    - Downloadable file for offline access and sharing
-    - Structured data visualization in document format
-    - Ready-to-present analytical insights
-
-    Args:
-        instructions: Report generation instructions or requirements (currently not used)
-        filename: Source PDF file name (must end with .pdf extension)
-        file_data_base64: Base64-encoded PDF content (automatically provided by framework)
-
-    Returns:
-        Dictionary containing:
-        - results: Report generation summary and success confirmation
-        - artifacts: Professional PDF report with complete analysis
-        - display: Optimized viewer configuration for report presentation
-        - meta_data: Source file information and analysis statistics
-        Or error message if PDF cannot be processed or report generation fails
+    Extract and analyze text content from PDF documents with comprehensive word frequency analysis.
+
+    This powerful PDF processing tool provides detailed text analytics for PDF documents:
+    
+    **PDF Text Extraction:**
+    - Extracts text from all pages in PDF documents
+    - Handles various PDF formats and structures
+    - Works with both text-based and scanned PDFs (text extraction only)
+    - Preserves document structure and content flow
+
+    **Text Analysis Features:**
+    - Complete word count across entire document
+    - Top 100 most frequently used words identification
+    - Case-insensitive word analysis for accurate frequency counting
+    - Word pattern recognition and linguistic analysis
+    - Document length and content density assessment
+
+    **Content Processing:**
+    - Intelligent text cleaning and normalization
+    - Punctuation and formatting handling
+    - Multi-language text support
+    - Special character and encoding management
+
+    **Analytics Insights:**
+    - Document vocabulary richness and complexity
+    - Key topic identification through word frequency
+    - Content themes and focus areas analysis
+    - Writing style and language pattern recognition
+    - Document structure and organization assessment
+
+    **Use Cases:**
+    - Academic paper and research document analysis
+    - Legal document keyword extraction and analysis
+    - Content marketing and SEO keyword research
+    - Document classification and categorization
+    - Research literature review and summarization
+    - Contract and agreement content analysis
+
+    **Supported PDF Types:**
+    - Research papers, reports, and academic documents
+    - Business documents, contracts, and agreements
+    - Marketing materials and content documents
+    - Technical documentation and manuals
+    - Legal documents and regulatory filings
+
+    **Output Format:**
+    - Structured word frequency data
+    - Total document word count statistics
+    - Top 100 words with occurrence frequencies
+    - Document metadata and processing information
+
+    Args:
+        instructions: Processing instructions or requirements (currently not used)
+        filename: PDF file name (must end with .pdf extension)
+        original_filename: The original name of the file.
+
+    Returns:
+        Dictionary containing:
+        - operation: Processing type confirmation
+        - filename: Source PDF file name
+        - total_word_count: Complete document word count
+        - top_100_words: Dictionary of most frequent words with counts
+        Or error message if PDF cannot be processed or contains no extractable text
     """
-    # --- 1. Perform the same analysis as the first function ---
-    analysis_result = _analyze_pdf_content(instructions, filename, file_data_base64)
-    if "error" in analysis_result:
-        return analysis_result # Return the error if analysis failed
+    logger.info("Step 8: Entering analyze_pdf tool")
+    return _analyze_pdf_content(instructions, filename, original_filename)
 
-    # --- 2. Generate a PDF report from the analysis results ---
-    try:
-        buffer = io.BytesIO()
-        # Create a canvas to draw on, using the buffer as the "file"
-        p = canvas.Canvas(buffer, pagesize=letter)
-        width, height = letter
-
-        # Set up starting coordinates
-        x = inch
-        y = height - inch
-
-        # Write title
-        p.setFont("Helvetica-Bold", 16)
-        p.drawString(x, y, f"Analysis Report for: {analysis_result['filename']}")
-        y -= 0.5 * inch
-
-        # Write summary
-        p.setFont("Helvetica", 12)
-        p.drawString(x, y, f"Total Word Count: {analysis_result['total_word_count']}")
-        y -= 0.3 * inch
-
-        # Write header for top words
-        p.setFont("Helvetica-Bold", 12)
-        p.drawString(x, y, "Top 100 Most Frequent Words:")
-        y -= 0.25 * inch
-
-        # Write the list of top words
-        p.setFont("Helvetica", 10)
-        col1_x, col2_x, col3_x, col4_x = x, x + 1.75*inch, x + 3.5*inch, x + 5.25*inch
-        current_x = col1_x
-        
-        # Simple column layout
-        count = 0
-        for word, freq in analysis_result['top_100_words'].items():
-            if y < inch: # New page if we run out of space
-                p.showPage()
-                p.setFont("Helvetica", 10)
-                y = height - inch
-
-            p.drawString(current_x, y, f"{word}: {freq}")
-            
-            # Move to the next column
-            if count % 4 == 0: current_x = col2_x
-            elif count % 4 == 1: current_x = col3_x
-            elif count % 4 == 2: current_x = col4_x
-            else: # Move to the next row
-                current_x = col1_x
-                y -= 0.2 * inch
-            count += 1
-            
-        # Finalize the PDF
-        p.save()
-        
-        # --- 3. Encode the generated PDF for return ---
-        report_bytes = buffer.getvalue()
-        buffer.close()
-        report_base64 = base64.b64encode(report_bytes).decode('utf-8')
 
-        # Create a new filename for the report
-        report_filename = f"analysis_report_{filename.replace('.pdf', '.txt')}.pdf"
+@mcp.tool
+def generate_report_about_pdf(
+    instructions: Annotated[str, "Instructions for the tool, not used in this implementation"],
+    filename: Annotated[str, "The name of the file, which must have a '.pdf' extension"],
+    original_filename: Optional[str] = None
+) -> Dict[str, Any]:
+    """
+    Create comprehensive PDF analysis reports with professional formatting and detailed word frequency insights.
 
-        # --- 4. Return v2 MCP format with artifacts and display ---
-        return {
-            "results": {
-                "operation": "pdf_analysis_report",
-                "original_filename": filename,
-                "message": f"Successfully generated analysis report for {filename}."
-            },
-            "artifacts": [
-                {
-                    "name": report_filename,
-                    "b64": report_base64,
-                    "mime": "application/pdf",
-                    "size": len(report_bytes),
-                    "description": f"Analysis report for {filename} with word frequency data",
-                    "viewer": "pdf"
-                }
-            ],
-            "display": {
-                "open_canvas": True,
-                "primary_file": report_filename,
-                "mode": "replace",
-                "viewer_hint": "pdf"
-            },
-            "meta_data": {
-                "original_file": filename,
-                "word_count": analysis_result["results"]["total_word_count"],
-                "report_type": "pdf_analysis",
-                "top_words_count": len(analysis_result["results"]["top_100_words"])
-            }
-        }
+    This advanced PDF reporting tool combines text analysis with professional document generation:
+    
+    **Complete PDF Analysis Workflow:**
+    - Performs full text extraction and word frequency analysis
+    - Generates professional analysis reports in PDF format
+    - Creates downloadable documents with structured data presentation
+    - Provides ready-to-share analytical insights
+
+    **Report Contents:**
+    - Executive summary with document overview
+    - Total word count and document statistics
+    - Top 100 most frequent words with occurrence counts
+    - Professional multi-column layout for easy reading
+    - Organized tabular presentation of word frequency data
+
+    **Report Features:**
+    - Clean, professional PDF formatting using ReportLab
+    - Multi-column layout optimizing space usage
+    - Clear headers and structured information hierarchy
+    - Page management for large datasets
+    - High-quality typography and spacing
+
+    **Document Generation:**
+    - Creates new PDF reports from analysis results
+    - Professional business document appearance
+    - Optimized layout for printing and digital sharing
+    - Comprehensive data presentation in readable format
+
+    **Use Cases:**
+    - Academic research document analysis reporting
+    - Legal document content analysis for litigation support
+    - Content marketing keyword research documentation
+    - Business document compliance and review reporting
+    - Research literature analysis and summarization
+    - Document classification and content audit reports
+
+    **Report Applications:**
+    - Stakeholder presentations with document insights
+    - Content strategy planning based on word analysis
+    - Academic research methodology documentation
+    - Legal discovery and document review processes
+    - Quality assurance for written content
+
+    **Output Features:**
+    - Professional PDF report with embedded analysis
+    - Downloadable file for offline access and sharing
+    - Structured data visualization in document format
+    - Ready-to-present analytical insights
+
+    Args:
+        instructions: Report generation instructions or requirements (currently not used)
+        filename: Source PDF file name (must end with .pdf extension)
+        original_filename: The original name of the file.
+
+    Returns:
+        Dictionary containing:
+        - results: Report generation summary and success confirmation
+        - artifacts: Professional PDF report with complete analysis
+        - display: Optimized viewer configuration for report presentation
+        - meta_data: Source file information and analysis statistics
+        Or error message if PDF cannot be processed or report generation fails
+    """
+    logger.info("Step 8: Entering generate_report_about_pdf tool")
+    # --- 1. Perform the same analysis as the first function ---
+    analysis_result = _analyze_pdf_content(instructions, filename, original_filename)
+    if "error" in analysis_result.get("results", {}):
+        return analysis_result
 
-    except Exception as e:
-        # print traceback for debugging
-        import traceback
-        traceback.print_exc()
-        return {"results": {"error": f"Failed to generate PDF report: {str(e)}"}}
 
 
 if __name__ == "__main__":
-    # This will start the server and listen for MCP requests.
-    # To use it, you would run this script and then connect to it
-    # with a FastMCP client.
-    print("Starting PDF Analyzer MCP server with report generation...")
-    mcp.run()
+    mcp.run()
\ No newline at end of file
diff --git a/backend/modules/llm/litellm_caller.py b/backend/modules/llm/litellm_caller.py
index 8e4965a..52c5ba3 100644
--- a/backend/modules/llm/litellm_caller.py
+++ b/backend/modules/llm/litellm_caller.py
@@ -207,6 +207,11 @@ async def call_with_tools(
             )
             
             message = response.choices[0].message
+
+            if tool_choice == "required" and not getattr(message, 'tool_calls', None):
+                logger.error(f"LLM failed to return tool calls when tool_choice was 'required'. Full response: {response}")
+                raise ValueError("LLM failed to return tool calls when tool_choice was 'required'.")
+
             return LLMResponse(
                 content=getattr(message, 'content', None) or "",
                 tool_calls=getattr(message, 'tool_calls', None),
diff --git a/backend/modules/mcp_tools/client.py b/backend/modules/mcp_tools/client.py
index 71a4ce6..979cc95 100644
--- a/backend/modules/mcp_tools/client.py
+++ b/backend/modules/mcp_tools/client.py
@@ -777,6 +777,7 @@ async def execute_tool(
         context: Optional[Dict[str, Any]] = None
     ) -> ToolResult:
         """Execute a tool call."""
+        logger.info(f"Step 7: Entering ToolManager.execute_tool for tool {tool_call.name}")
         # Handle canvas pseudo-tool
         if tool_call.name == "canvas_canvas":
             # Canvas tool just returns the content - it's handled by frontend
diff --git a/base-image-update-plan.md b/base-image-update-plan.md
deleted file mode 100644
index 932ccde..0000000
--- a/base-image-update-plan.md
+++ /dev/null
@@ -1,105 +0,0 @@
-# Base Image Update Plan: Ubuntu → Fedora
-
-## Overview
-Migrate from Ubuntu 24.04 to Fedora:latest base image and ensure all tests pass. Remove Playwright dependency issues while maintaining comprehensive testing coverage.
-
-## Current State Analysis
-- **Base Images**: Ubuntu 24.04 in both `Dockerfile` and `Dockerfile-test`
-- **Testing Strategy**: Mix of Playwright (problematic) and simple E2E tests with Beautiful Soup
-- **CI/CD**: GitHub Actions using test container → build production → push
-- **Current Tests**: Backend tests, frontend tests, E2E tests (Playwright + simple Python)
-
-## Migration Plan
-
-### Phase 1: Update Base Images
-1. **Replace Ubuntu with Fedora:latest** in both Dockerfiles
-2. **Update package managers**: `apt-get` → `dnf`
-3. **Update package names**: Fedora equivalents for system dependencies
-4. **Fix Node.js installation**: Use Fedora's Node.js packages or NodeSource for Fedora
-
-### Phase 2: Comment Out Playwright Dependencies
-1. **Comment out Playwright tests** in test scripts (DO NOT DELETE)
-2. **Keep only Beautiful Soup-based E2E tests** (`simple_e2e_test.py`)
-3. **Update test runners** to skip Playwright
-4. **Comment out Playwright dependencies** in package.json (keep for future)
-
-### Phase 3: Fedora-Specific Adjustments
-1. **User management**: Fedora uses different commands for user creation
-2. **Python setup**: Ensure Python 3.12 is available on Fedora
-3. **uv installer**: Verify uv works on Fedora
-4. **System dependencies**: Update curl, hostname, sudo installation
-
-### Phase 4: Testing Strategy
-1. **Keep simple E2E tests**: HTTP requests to test API endpoints
-2. **Keep backend tests**: pytest-based unit tests
-3. **Keep frontend tests**: Vitest/Jest tests (no browser required)
-4. **Comment out**: All Playwright browser-based tests
-
-### Phase 5: Local Testing & CI/CD
-1. **Test locally** with new Dockerfiles
-2. **Fix any Fedora-specific issues**
-3. **Commit and push** to trigger GitHub Actions
-4. **Monitor CI/CD** and fix failures iteratively
-
-## Key Changes
-
-### Package Manager Changes
-- `apt-get update && apt-get install -y` → `dnf update -y && dnf install -y`
-- `apt-get clean && rm -rf /var/lib/apt/lists/*` → `dnf clean all`
-
-### System Package Mapping
-- `python3` → `python3` (same)
-- `python3-pip` → `python3-pip` (same)
-- `python3-venv` → `python3-virtualenv`
-- `nodejs` → `nodejs`
-- `npm` → `npm`
-- `curl` → `curl` (same)
-- `hostname` → `hostname` (same)
-- `sudo` → `sudo` (same)
-- `ca-certificates` → `ca-certificates` (same)
-- `dos2unix` → `dos2unix` (same)
-- `wget` → `wget` (same)
-
-### Node.js Installation
-- Replace NodeSource Ubuntu repo with Fedora approach
-- Use either Fedora's built-in Node.js or NodeSource Fedora repo
-
-### User Management
-- `groupadd -r appuser && useradd -r -g appuser appuser` should work the same on Fedora
-
-### Testing Changes
-- Comment out Playwright test execution in `test/e2e_tests.sh`
-- Keep `simple_e2e_test.py` as primary E2E testing
-- Comment out Playwright dependencies in `frontend/package.json`
-- Update test scripts to skip Playwright steps
-
-## Risk Mitigation
-- **Incremental approach**: Test each Dockerfile separately
-- **Fallback plan**: Can revert to Ubuntu if Fedora causes major issues
-- **Simple tests**: Focus on HTTP-based tests that don't depend on browser automation
-- **Preserve Playwright**: Comment out rather than delete for future use
-
-## Success Criteria
-1. Both Dockerfiles build successfully with Fedora base
-2. All non-Playwright tests pass locally
-3. CI/CD pipeline passes with new configuration
-4. Application runs correctly in Fedora container
-5. API endpoints are accessible and functional
-
-## Files to Modify
-- `Dockerfile` - Production image
-- `Dockerfile-test` - Test image
-- `test/e2e_tests.sh` - Comment out Playwright execution
-- `frontend/package.json` - Comment out Playwright dependencies
-- Any other test scripts that reference Playwright
-
-## Timeline
-- Phase 1-3: Update Dockerfiles and dependencies
-- Phase 4: Update test configuration
-- Phase 5: Local testing and CI/CD validation
-
-## Notes
-- Always use timeouts for network operations
-- Test locally before pushing to CI/CD
-- Monitor resource usage during Fedora migration
-- Keep detailed logs of any issues encountered
\ No newline at end of file
diff --git a/config/overrides/mcp.json b/config/overrides/mcp.json
index b6c6ba6..c3a7bbd 100644
--- a/config/overrides/mcp.json
+++ b/config/overrides/mcp.json
@@ -21,6 +21,16 @@
     "author": "Chat UI Team",
     "short_description": "PowerPoint presentation generator",
     "help_email": "support@chatui.example.com"
+  }, 
+  "pdfbasic": {
+    "command": ["python", "mcp/pdfbasic/main.py"],
+    "cwd": "backend",
+    "groups": ["users"],
+    "is_exclusive": false,
+    "description": "Extract and analyze text content from PDF documents, search within PDFs, and summarize content",
+    "author": "Chat UI Team",
+    "short_description": "PDF text extraction and analysis",
+    "help_email": "support@chatui.example.com"
   }
 
 }

From d5dbfd3e581ce11891cac7056f2576901980f5fa Mon Sep 17 00:00:00 2001
From: Anthony <agarlan@sandia.gov>
Date: Thu, 30 Oct 2025 00:50:51 +0000
Subject: [PATCH 2/3] feat: enhance chat file handling and PDF analysis

- Refactor chat service to directly manage file references in session context for existing S3 files, bypassing complex file handling utilities and improving efficiency
- Modify PDF analysis tool to generate in-memory PDF reports with word frequency summaries, providing better visual output for text analytics
---
 backend/application/chat/service.py |  33 ++--
 backend/mcp/file_size_test/main.py  | 292 ++++++++++++++++++++++++++++
 backend/mcp/pdfbasic/main.py        | 112 ++++++++++-
 config/overrides/mcp.json           |  10 +
 4 files changed, 430 insertions(+), 17 deletions(-)
 create mode 100644 backend/mcp/file_size_test/main.py

diff --git a/backend/application/chat/service.py b/backend/application/chat/service.py
index 299ae00..1cea4bc 100644
--- a/backend/application/chat/service.py
+++ b/backend/application/chat/service.py
@@ -341,7 +341,7 @@ async def handle_attach_file(
 
         try:
             # Get file metadata
-            file_result = await self.file_manager.get_file(user_email, s3_key)
+            file_result = await self.file_manager.s3_client.get_file(user_email, s3_key)
             if not file_result:
                 return {
                     "type": "file_attach",
@@ -359,25 +359,26 @@ async def handle_attach_file(
                     "error": "Invalid file metadata"
                 }
 
-            # Add file to session context
-            session.context = await file_utils.handle_session_files(
-                session_context=session.context,
-                user_email=user_email,
-                files_map={
-                    filename: {
-                        "key": s3_key,
-                        "content_type": file_result.get("content_type"),
-                        "size": file_result.get("size"),
-                        "filename": filename
-                    }
-                },
-                file_manager=self.file_manager,
-                update_callback=update_callback
-            )
+            # Add file reference directly to session context (file already exists in S3)
+            session.context.setdefault("files", {})[filename] = {
+                "key": s3_key,
+                "content_type": file_result.get("content_type"),
+                "size": file_result.get("size"),
+                "source": "user",
+                "last_modified": file_result.get("last_modified"),
+            }
 
             sanitized_s3_key = s3_key.replace('\r', '').replace('\n', '')
             logger.info(f"Attached file ({sanitized_s3_key}) to session {session_id}")
 
+            # Emit files_update to notify UI
+            if update_callback:
+                await file_utils.emit_files_update_from_context(
+                    session_context=session.context,
+                    file_manager=self.file_manager,
+                    update_callback=update_callback
+                )
+
             return {
                 "type": "file_attach",
                 "s3_key": s3_key,
diff --git a/backend/mcp/file_size_test/main.py b/backend/mcp/file_size_test/main.py
new file mode 100644
index 0000000..ac92d0c
--- /dev/null
+++ b/backend/mcp/file_size_test/main.py
@@ -0,0 +1,292 @@
+#!/usr/bin/env python3
+"""
+File Size Test MCP Server using FastMCP.
+Simple tool for testing file transfer by returning file size.
+"""
+
+import base64
+import io
+import os
+import logging
+from typing import Any, Dict, Annotated
+
+import requests
+from fastmcp import FastMCP
+
+logger = logging.getLogger(__name__)
+
+mcp = FastMCP("File_Size_Test")
+
+
+@mcp.tool
+def process_file_demo(
+    filename: Annotated[str, "The file to process (URL or base64)"],
+    username: Annotated[str, "Username for auditing"] = None
+) -> Dict[str, Any]:
+    """
+    Demo tool that processes a file and returns a new transformed file.
+
+    This tool demonstrates the v2 MCP artifacts contract by:
+    - Accepting a file input
+    - Processing it (converting text to uppercase for demo)
+    - Returning a new file as an artifact with proper v2 format
+    - Including display hints for canvas viewing
+
+    **v2 Artifacts Contract:**
+    - Uses artifacts array with base64 content
+    - Includes MIME types and metadata
+    - Provides display hints for canvas behavior
+    - Supports username injection for auditing
+
+    **File Processing:**
+    - For text files: converts content to uppercase
+    - For binary files: demonstrates file modification capability
+    - Preserves original file structure where possible
+
+    **Return Format:**
+    - results: Summary of operation
+    - artifacts: Array containing the processed file
+    - display: Canvas hints (open_canvas: true, primary_file, etc.)
+    - meta_data: Additional processing details
+
+    Args:
+        filename: File reference (URL or base64 data) to process
+        username: Injected user identity for auditing
+
+    Returns:
+        Dictionary with results, artifacts, and display hints per v2 contract
+    """
+    print(f"DEBUG: process_file_demo called with filename: {filename}")
+    print(f"DEBUG: username: {username}")
+    try:
+        # Get the file content (reuse logic from get_file_size)
+        is_url = (
+            filename.startswith("http://") or
+            filename.startswith("https://") or
+            filename.startswith("/api/") or
+            filename.startswith("/")
+        )
+        print(f"DEBUG: is_url determined as: {is_url}")
+
+        if is_url:
+            if filename.startswith("/"):
+                backend_url = os.getenv("BACKEND_URL", "http://localhost:8000")
+                url = f"{backend_url}{filename}"
+            else:
+                url = filename
+            logger.info(f"Downloading file for processing: {url}")
+            response = requests.get(url)
+            response.raise_for_status()
+            file_bytes = response.content
+            original_filename = filename.split('/')[-1] or "processed_file.txt"
+        else:
+            # Assume base64
+            logger.info("Decoding base64 for file processing")
+            file_bytes = base64.b64decode(filename)
+            original_filename = "processed_file.txt"
+
+        print(f"DEBUG: Original file size: {len(file_bytes)} bytes")
+
+        # Process the file (demo: convert text to uppercase)
+        try:
+            # Try to decode as text for processing
+            original_text = file_bytes.decode('utf-8')
+            processed_text = original_text.upper()
+            processed_bytes = processed_text.encode('utf-8')
+            processed_mime = "text/plain"
+            description = "Processed text (converted to uppercase)"
+        except UnicodeDecodeError:
+            # If not text, do a simple binary modification (demo purpose)
+            processed_bytes = file_bytes + b"\n[DEMO PROCESSED]"
+            processed_mime = "application/octet-stream"
+            description = "Processed binary file (demo modification)"
+
+        # Create artifact
+        processed_b64 = base64.b64encode(processed_bytes).decode('ascii')
+        new_filename = f"processed_{original_filename}"
+
+        # Create display hints
+        display_hints = {
+            "open_canvas": True,
+            "primary_file": new_filename,
+            "mode": "replace",
+            "viewer_hint": "auto"
+        }
+
+        result = {
+            "results": {
+                "operation": "process_file_demo",
+                "original_filename": original_filename,
+                "processed_filename": new_filename,
+                "original_size": len(file_bytes),
+                "processed_size": len(processed_bytes),
+                "processing_type": "text_uppercase" if 'original_text' in locals() else "binary_demo",
+                "status": "success"
+            },
+            "meta_data": {
+                "is_error": False,
+                "processed_by": "process_file_demo_v2",
+                "username": username,
+                "mime_type": processed_mime
+            },
+            "artifacts": [
+                {
+                    "name": new_filename,
+                    "b64": processed_b64,
+                    "mime": processed_mime,
+                    "size": len(processed_bytes),
+                    "description": description,
+                    "viewer": "auto"
+                }
+            ],
+            "display": display_hints
+        }
+        print(f"DEBUG: About to return processed file result: {result['results']}")
+        return result
+
+    except Exception as e:
+        print(f"DEBUG: Exception in process_file_demo: {str(e)}")
+        import traceback
+        traceback.print_exc()
+        error_result = {
+            "results": {
+                "operation": "process_file_demo",
+                "error": f"File processing failed: {str(e)}",
+                "filename": filename
+            },
+            "meta_data": {
+                "is_error": True,
+                "error_type": type(e).__name__,
+                "username": username
+            }
+        }
+        return error_result
+
+
+@mcp.tool
+def get_file_size(
+    filename: Annotated[str, "The file to check (URL or base64)"]
+) -> Dict[str, Any]:
+    """
+    Test file transfer by returning the size of the transferred file.
+
+    This simple tool is designed for testing file transfer functionality
+    between frontend and backend. It accepts a file and returns its size in bytes.
+
+    **File Input Support:**
+    - URL-based files (http://, https://, or /api/ paths)
+    - Base64-encoded file data
+    - Automatic backend URL construction for relative paths
+
+    **Return Information:**
+    - File size in bytes
+    - File size in human-readable format (KB, MB)
+    - Original filename or URL
+
+    **Use Cases:**
+    - Testing file upload/download workflows
+    - Validating file transfer infrastructure
+    - Debugging file handling issues
+    - Verifying file size limits
+
+    Args:
+        filename: File reference (URL or base64 data)
+
+    Returns:
+        Dictionary containing:
+        - operation: "get_file_size"
+        - filename: Original filename/URL
+        - size_bytes: File size in bytes
+        - size_human: Human-readable size (e.g., "1.5 MB")
+        Or error message if file cannot be accessed
+    """
+    print(f"DEBUG: get_file_size called with filename: {filename}")
+    print(f"DEBUG: filename type: {type(filename)}, length: {len(filename) if filename else 0}")
+    try:
+        # Check if filename is a URL (absolute or relative)
+        is_url = (
+            filename.startswith("http://") or
+            filename.startswith("https://") or
+            filename.startswith("/api/") or
+            filename.startswith("/")
+        )
+        print(f"DEBUG: is_url determined as: {is_url}")
+
+        if is_url:
+            # Convert relative URLs to absolute URLs
+            if filename.startswith("/"):
+                backend_url = os.getenv("BACKEND_URL", "http://localhost:8000")
+                url = f"{backend_url}{filename}"
+                print(f"DEBUG: Constructing URL from relative path: {filename} -> {url}")
+            else:
+                url = filename
+                print(f"DEBUG: Using absolute URL: {url}")
+
+            print(f"DEBUG: About to download from URL: {url}")
+            logger.info(f"Downloading file from URL: {url}")
+            response = requests.get(url)
+            print(f"DEBUG: HTTP response status: {response.status_code}")
+            response.raise_for_status()
+            file_bytes = response.content
+            print(f"DEBUG: Successfully downloaded file content, length: {len(file_bytes)} bytes")
+        else:
+            # Assume it's base64-encoded data
+            print(f"DEBUG: Treating input as base64 data, attempting to decode")
+            logger.info("Decoding base64 file data")
+            file_bytes = base64.b64decode(filename)
+            print(f"DEBUG: Successfully decoded base64 data, length: {len(file_bytes)} bytes")
+
+        # Calculate file size
+        size_bytes = len(file_bytes)
+        size_human = _format_size(size_bytes)
+        print(f"DEBUG: Calculated file size: {size_bytes} bytes ({size_human})")
+
+        result = {
+            "results": {
+                "operation": "get_file_size",
+                "filename": filename,
+                "size_bytes": size_bytes,
+                "size_human": size_human,
+                "status": "success"
+            },
+            "meta_data": {
+                "is_error": False,
+                "transfer_method": "url" if is_url else "base64"
+            }
+        }
+        print(f"DEBUG: About to return success result: {result}")
+        return result
+
+    except Exception as e:
+        print(f"DEBUG: Exception occurred while processing file: {str(e)}")
+        print(f"DEBUG: Exception type: {type(e).__name__}")
+        print(f"DEBUG: Filename that caused error: {filename}")
+        import traceback
+        print("DEBUG: Full traceback:")
+        traceback.print_exc()
+        error_result = {
+            "results": {
+                "operation": "get_file_size",
+                "error": f"File size check failed: {str(e)}",
+                "filename": filename
+            },
+            "meta_data": {
+                "is_error": True,
+                "error_type": type(e).__name__
+            }
+        }
+        print(f"DEBUG: About to return error result: {error_result}")
+        return error_result
+
+
+def _format_size(size_bytes: int) -> str:
+    """Format file size in human-readable format."""
+    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
+        if size_bytes < 1024.0:
+            return f"{size_bytes:.2f} {unit}"
+        size_bytes /= 1024.0
+    return f"{size_bytes:.2f} PB"
+
+
+if __name__ == "__main__":
+    mcp.run()
diff --git a/backend/mcp/pdfbasic/main.py b/backend/mcp/pdfbasic/main.py
index c429fc0..c778791 100644
--- a/backend/mcp/pdfbasic/main.py
+++ b/backend/mcp/pdfbasic/main.py
@@ -131,7 +131,7 @@ def analyze_pdf(
     """
     Extract and analyze text content from PDF documents with comprehensive word frequency analysis.
 
-    This powerful PDF processing tool provides detailed text analytics for PDF documents:
+    This  PDF processing tool provides detailed text analytics for PDF documents:
     
     **PDF Text Extraction:**
     - Extracts text from all pages in PDF documents
@@ -274,6 +274,116 @@ def generate_report_about_pdf(
     if "error" in analysis_result.get("results", {}):
         return analysis_result
 
+    # --- 2. Generate the PDF report ---
+    try:
+        results_data = analysis_result["results"]
+
+        # Create PDF report in memory
+        pdf_buffer = io.BytesIO()
+        c = canvas.Canvas(pdf_buffer, pagesize=letter)
+        width, height = letter
+
+        # Title
+        c.setFont("Helvetica-Bold", 16)
+        c.drawString(1 * inch, height - 1 * inch, "PDF Analysis Report")
+
+        # Document info
+        c.setFont("Helvetica-Bold", 12)
+        c.drawString(1 * inch, height - 1.5 * inch, "Document:")
+        c.setFont("Helvetica", 10)
+        c.drawString(1.5 * inch, height - 1.5 * inch, results_data.get("filename", "Unknown"))
+
+        # Total word count
+        c.setFont("Helvetica-Bold", 12)
+        c.drawString(1 * inch, height - 2 * inch, "Total Words:")
+        c.setFont("Helvetica", 10)
+        c.drawString(1.5 * inch, height - 2 * inch, str(results_data.get("total_word_count", 0)))
+
+        # Top 100 words header
+        c.setFont("Helvetica-Bold", 12)
+        c.drawString(1 * inch, height - 2.5 * inch, "Top 100 Most Frequent Words:")
+
+        # Display top words in columns
+        c.setFont("Helvetica", 9)
+        y_position = height - 3 * inch
+        x_col1 = 1 * inch
+        x_col2 = 3.5 * inch
+        x_col3 = 6 * inch
+
+        top_100_words = results_data.get("top_100_words", {})
+        words_list = list(top_100_words.items())
+
+        for idx, (word, count) in enumerate(words_list):
+            # Determine column position
+            col = idx % 3
+            if col == 0:
+                x_pos = x_col1
+            elif col == 1:
+                x_pos = x_col2
+            else:
+                x_pos = x_col3
+
+            # Move to next row after every 3 words
+            if col == 0 and idx > 0:
+                y_position -= 0.2 * inch
+
+            # Check if we need a new page
+            if y_position < 1 * inch:
+                c.showPage()
+                c.setFont("Helvetica", 9)
+                y_position = height - 1 * inch
+
+            # Draw word and count
+            text = f"{word}: {count}"
+            c.drawString(x_pos, y_position, text)
+
+        c.save()
+
+        # Get PDF bytes and encode to base64
+        pdf_bytes = pdf_buffer.getvalue()
+        pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
+
+        # --- 3. Return the structured response (v2 MCP compliant) ---
+        report_name = f"analysis_report_{results_data.get('filename', 'document').replace('.pdf', '')}.pdf"
+
+        return {
+            "results": {
+                "operation": "pdf_report_generation",
+                "status": "Success",
+                "message": f"Generated analysis report for {results_data.get('filename', 'document')}",
+                "total_word_count": results_data.get("total_word_count", 0),
+                "words_analyzed": len(top_100_words)
+            },
+            "artifacts": [
+                {
+                    "name": report_name,
+                    "b64": pdf_base64,
+                    "mime": "application/pdf",
+                    "size": len(pdf_bytes),
+                    "description": "PDF analysis report with word frequency statistics"
+                }
+            ],
+            "display": {
+                "open_canvas": True,
+                "primary_file": report_name,
+                "mode": "replace",
+                "viewer_hint": "pdf"
+            },
+            "meta_data": {
+                "source_file": results_data.get("filename", "Unknown"),
+                "total_words": results_data.get("total_word_count", 0)
+            }
+        }
+
+    except Exception as e:
+        import traceback
+        traceback.print_exc()
+        return {
+            "results": {
+                "error": f"Report generation failed: {str(e)}"
+            }
+        }
+
 
 
 if __name__ == "__main__":
diff --git a/config/overrides/mcp.json b/config/overrides/mcp.json
index c3a7bbd..89b5ba6 100644
--- a/config/overrides/mcp.json
+++ b/config/overrides/mcp.json
@@ -31,6 +31,16 @@
     "author": "Chat UI Team",
     "short_description": "PDF text extraction and analysis",
     "help_email": "support@chatui.example.com"
+  },
+  "file_size_test": {
+    "command": ["python", "mcp/file_size_test/main.py"],
+    "cwd": "backend",
+    "groups": ["users"],
+    "is_exclusive": false,
+    "description": "Simple test tool that accepts a file transfer and returns the file size in bytes",
+    "author": "Chat UI Team",
+    "short_description": "File transfer test tool",
+    "help_email": "support@chatui.example.com"
   }
 
 }

From 88cabf267590d2f8694457e0564d908c4fb5cfa7 Mon Sep 17 00:00:00 2001
From: Anthony <agarlan@sandia.gov>
Date: Thu, 30 Oct 2025 00:55:12 +0000
Subject: [PATCH 3/3] fix: resolve Dependabot configuration issues
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove non-existent security-team references from reviewers and assignees.
The required labels (security, github-actions, python, docker) have been
created directly in the repository.

This fixes the configuration errors reported in PRs #22 and #23.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
---
 .github/dependabot.yml | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
index 2969475..d0afe24 100644
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@@ -9,10 +9,6 @@ updates:
       day: "monday"
       time: "09:00"
     open-pull-requests-limit: 10
-    reviewers:
-      - "security-team"
-    assignees:
-      - "security-team"
     commit-message:
       prefix: "security(deps)"
       include: "scope"
@@ -37,10 +33,6 @@ updates:
       day: "monday"
       time: "09:00"
     open-pull-requests-limit: 10
-    reviewers:
-      - "security-team"
-    assignees:
-      - "security-team"
     commit-message:
       prefix: "security(deps)"
       include: "scope"
@@ -69,10 +61,6 @@ updates:
       interval: "weekly"
       day: "tuesday"
       time: "09:00"
-    reviewers:
-      - "security-team"
-    assignees:
-      - "security-team"
     commit-message:
       prefix: "security(docker)"
     labels:
@@ -87,10 +75,6 @@ updates:
       interval: "weekly"
       day: "wednesday"
       time: "09:00"
-    reviewers:
-      - "security-team"
-    assignees:
-      - "security-team"
     commit-message:
       prefix: "security(actions)"
     labels: