tale-project · larryro · Dec 12, 2025 · Dec 12, 2025 · coderabbitai · Dec 12, 2025
diff --git a/services/crawler/app/file_parser_service.py b/services/crawler/app/file_parser_service.py
@@ -0,0 +1,147 @@
+"""
+File Parser Service for extracting text content from documents.
+
+Handles:
+- PDF text extraction using PyMuPDF
+- DOCX text extraction using python-docx
+- PPTX text extraction using python-pptx
+"""
+
+import logging
+from io import BytesIO
+from typing import Dict, Any
+
+logger = logging.getLogger(__name__)
+
+
+class FileParserService:
+    """Service for parsing and extracting text from various document formats."""
+
+    def parse_pdf(self, file_bytes: bytes, filename: str = "document.pdf") -> Dict[str, Any]:
+        """Extract text content from a PDF file."""
+        import fitz  # PyMuPDF
+
+        try:
+            doc = fitz.open(stream=file_bytes, filetype="pdf")
+            pages = []
+            full_text = []
+
+            for page_num, page in enumerate(doc, start=1):
+                text = page.get_text("text")
+                pages.append({"page_number": page_num, "text": text.strip()})
+                full_text.append(text)
+
+            metadata = doc.metadata or {}
+            doc.close()
+
+            return {
+                "success": True,
+                "filename": filename,
+                "file_type": "application/pdf",
+                "page_count": len(pages),
+                "pages": pages,
+                "full_text": "\n\n".join(full_text).strip(),
+                "metadata": {
+                    "title": metadata.get("title", ""),
+                    "author": metadata.get("author", ""),
+                    "subject": metadata.get("subject", ""),
+                },
+            }
+        except Exception as e:
+            logger.error(f"Error parsing PDF: {e}")
+            return {"success": False, "filename": filename, "file_type": "application/pdf", "error": str(e)}
+
+    def parse_docx(self, file_bytes: bytes, filename: str = "document.docx") -> Dict[str, Any]:
+        """Extract text content from a DOCX file."""
+        from docx import Document
+
+        try:
+            doc = Document(BytesIO(file_bytes))
+            paragraphs = []
+            for para in doc.paragraphs:
+                text = para.text.strip()
+                if text:
+                    paragraphs.append({"text": text, "style": para.style.name if para.style else None})
+
+            tables = []
+            for table in doc.tables:
+                table_data = [[cell.text.strip() for cell in row.cells] for row in table.rows]
+                if table_data:
+                    tables.append(table_data)
+
+            full_text = "\n".join(p["text"] for p in paragraphs)
+            core_props = doc.core_properties
+
+            return {
+                "success": True,
+                "filename": filename,
+                "file_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+                "paragraph_count": len(paragraphs),
+                "table_count": len(tables),
+                "paragraphs": paragraphs,
+                "tables": tables,
+                "full_text": full_text,
+                "metadata": {"title": core_props.title or "", "author": core_props.author or ""},
+            }
+        except Exception as e:
+            logger.error(f"Error parsing DOCX: {e}")
+            return {"success": False, "filename": filename, "error": str(e)}
+
-    def parse_docx(self, file_bytes: bytes, filename: str = "document.docx") -> Dict[str, Any]:
-        """Extract text content from a DOCX file."""
-        from docx import Document
-
-        try:
-            doc = Document(BytesIO(file_bytes))
-            paragraphs = []
-            for para in doc.paragraphs:
-                text = para.text.strip()
-                if text:
-                    paragraphs.append({"text": text, "style": para.style.name if para.style else None})
-            
-            tables = []
-            for table in doc.tables:
-                table_data = [[cell.text.strip() for cell in row.cells] for row in table.rows]
-                if table_data:
-                    tables.append(table_data)
-            
-            full_text = "\n".join(p["text"] for p in paragraphs)
-            core_props = doc.core_properties
-            
-            return {
-                "success": True,
-                "filename": filename,
-                "file_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-                "paragraph_count": len(paragraphs),
-                "table_count": len(tables),
-                "paragraphs": paragraphs,
-                "tables": tables,
-                "full_text": full_text,
-                "metadata": {"title": core_props.title or "", "author": core_props.author or ""},
-            }
-        except Exception as e:
-            logger.error(f"Error parsing DOCX: {e}")
-            return {"success": False, "filename": filename, "error": str(e)}
+    def parse_docx(self, file_bytes: bytes, filename: str = "document.docx") -> Dict[str, Any]:
+        """Extract text content from a DOCX file."""
+        from docx import Document
+
+        try:
+            doc = Document(BytesIO(file_bytes))
+            paragraphs = []
+            for para in doc.paragraphs:
+                text = para.text.strip()
+                if text:
+                    paragraphs.append({"text": text, "style": para.style.name if para.style else None})
+            
+            tables = []
+            for table in doc.tables:
+                table_data = [[cell.text.strip() for cell in row.cells] for row in table.rows]
+                if table_data:
+                    tables.append(table_data)
+            
+            full_text = "\n".join(p["text"] for p in paragraphs)
+            core_props = doc.core_properties
+            
+            return {
+                "success": True,
+                "filename": filename,
+                "file_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+                "paragraph_count": len(paragraphs),
+                "table_count": len(tables),
+                "paragraphs": paragraphs,
+                "tables": tables,
+                "full_text": full_text,
+                "metadata": {"title": core_props.title or "", "author": core_props.author or ""},
+            }
+        except Exception as e:
+            logger.exception("Error parsing DOCX")
+            return {
+                "success": False,
+                "filename": filename,
+                "file_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+                "error": str(e),
+            }
-    def parse_docx(self, file_bytes: bytes, filename: str = "document.docx") -> Dict[str, Any]:
-        """Extract text content from a DOCX file."""
-        from docx import Document
-
-        try:
-            doc = Document(BytesIO(file_bytes))
-            paragraphs = []
-            for para in doc.paragraphs:
-                text = para.text.strip()
-                if text:
-                    paragraphs.append({"text": text, "style": para.style.name if para.style else None})
-            
-            tables = []
-            for table in doc.tables:
-                table_data = [[cell.text.strip() for cell in row.cells] for row in table.rows]
-                if table_data:
-                    tables.append(table_data)
-            
-            full_text = "\n".join(p["text"] for p in paragraphs)
-            core_props = doc.core_properties
-            
-            return {
-                "success": True,
-                "filename": filename,
-                "file_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-                "paragraph_count": len(paragraphs),
-                "table_count": len(tables),
-                "paragraphs": paragraphs,
-                "tables": tables,
-                "full_text": full_text,
-                "metadata": {"title": core_props.title or "", "author": core_props.author or ""},
-            }
-        except Exception as e:
-            logger.error(f"Error parsing DOCX: {e}")
-            return {"success": False, "filename": filename, "error": str(e)}
+    def parse_docx(self, file_bytes: bytes, filename: str = "document.docx") -> Dict[str, Any]:
+        """Extract text content from a DOCX file."""
+        from docx import Document
+
+        try:
+            doc = Document(BytesIO(file_bytes))
+            paragraphs = []
+            for para in doc.paragraphs:
+                text = para.text.strip()
+                if text:
+                    paragraphs.append({"text": text, "style": para.style.name if para.style else None})
+            
+            tables = []
+            for table in doc.tables:
+                table_data = [[cell.text.strip() for cell in row.cells] for row in table.rows]
+                if table_data:
+                    tables.append(table_data)
+            
+            full_text = "\n".join(p["text"] for p in paragraphs)
+            core_props = doc.core_properties
+            
+            return {
+                "success": True,
+                "filename": filename,
+                "file_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+                "paragraph_count": len(paragraphs),
+                "table_count": len(tables),
+                "paragraphs": paragraphs,
+                "tables": tables,
+                "full_text": full_text,
+                "metadata": {"title": core_props.title or "", "author": core_props.author or ""},
+            }
+        except Exception as e:
+            logger.exception("Error parsing DOCX")
+            return {
+                "success": False,
+                "filename": filename,
+                "file_type": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+                "error": str(e),
+            }
+    def parse_pptx(self, file_bytes: bytes, filename: str = "presentation.pptx") -> Dict[str, Any]:
+        """Extract text content from a PPTX file."""
+        from pptx import Presentation
+
+        try:
+            prs = Presentation(BytesIO(file_bytes))
+            slides = []
+            full_text_parts = []
+
+            for slide_num, slide in enumerate(prs.slides, start=1):
+                slide_text = []
+                for shape in slide.shapes:
+                    if shape.has_text_frame:
+                        for paragraph in shape.text_frame.paragraphs:
+                            text = paragraph.text.strip()
+                            if text:
+                                slide_text.append(text)
+                    if shape.has_table:
+                        for row in shape.table.rows:
+                            for cell in row.cells:
+                                text = cell.text.strip()
+                                if text:
+                                    slide_text.append(text)
+
+                slides.append({"slide_number": slide_num, "text_content": slide_text, "full_text": "\n".join(slide_text)})
+                full_text_parts.extend(slide_text)
+
+            core_props = prs.core_properties
+            return {
+                "success": True,
+                "filename": filename,
+                "file_type": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+                "slide_count": len(slides),
+                "slides": slides,
+                "full_text": "\n\n".join(full_text_parts),
+                "metadata": {"title": core_props.title or "", "author": core_props.author or ""},
+            }
+        except Exception as e:
+            logger.error(f"Error parsing PPTX: {e}")
+            return {"success": False, "filename": filename, "error": str(e)}
+
+    def parse_file(self, file_bytes: bytes, filename: str, content_type: str = "") -> Dict[str, Any]:
+        """Parse a file based on its content type or filename extension."""
+        filename_lower = filename.lower()
+        content_type_lower = content_type.lower() if content_type else ""
+
+        if filename_lower.endswith(".pdf") or "pdf" in content_type_lower:
+            return self.parse_pdf(file_bytes, filename)
+        elif filename_lower.endswith(".docx") or "wordprocessingml" in content_type_lower:
+            return self.parse_docx(file_bytes, filename)
+        elif filename_lower.endswith(".pptx") or "presentationml" in content_type_lower:
+            return self.parse_pptx(file_bytes, filename)
+        else:
+            return {
+                "success": False,
+                "filename": filename,
+                "error": f"Unsupported file type: {filename} ({content_type}). Supported: PDF, DOCX, PPTX.",
+            }
diff --git a/services/crawler/app/main.py b/services/crawler/app/main.py
@@ -37,10 +37,24 @@
     GeneratePptxResponse,
     GenerateDocxRequest,
     GenerateDocxResponse,
+    # File parsing models
+    ParseFileResponse,
 )
 from app.crawler_service import get_crawler_service
 from app.converter_service import get_converter_service
 from app.template_service import get_template_service
+from app.file_parser_service import FileParserService
+
+# Global file parser service instance
+_file_parser_service: FileParserService | None = None
+
+
+def get_file_parser_service() -> FileParserService:
+    """Get or create the file parser service instance."""
+    global _file_parser_service
+    if _file_parser_service is None:
+        _file_parser_service = FileParserService()
+    return _file_parser_service
 
 
 # Configure logging
@@ -886,6 +900,53 @@ async def generate_docx_from_template(
         )
 
 
+# ==================== File Parsing Endpoints ====================
+
+
+@app.post("/api/v1/parse/file", response_model=ParseFileResponse)
+async def parse_file_upload(
+    file: UploadFile = File(..., description="File to parse (PDF, DOCX, or PPTX)"),
+):
+    """
+    Parse a document file and extract its text content.
+
+    Supports PDF, DOCX, and PPTX files. Returns the extracted text content
+    along with metadata like page count, paragraph count, or slide count.
+
+    Args:
+        file: The document file to parse
+
+    Returns:
+        Parsed content including full text and metadata
+    """
+    try:
+        file_bytes = await file.read()
+
+        if not file_bytes:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="Empty file uploaded",
+            )
+
+        filename = file.filename or "unknown"
+        content_type = file.content_type or ""
+
+        parser = get_file_parser_service()
+        result = parser.parse_file(file_bytes, filename, content_type)
+
+        return ParseFileResponse(**result)
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error parsing file: {e}")
+        return ParseFileResponse(
+            success=False,
+            filename=file.filename or "unknown",
+            error=f"Failed to parse file: {str(e)}",
+        )
+
+
 if __name__ == "__main__":
     import uvicorn
 

diff --git a/services/crawler/app/models.py b/services/crawler/app/models.py
@@ -277,4 +277,18 @@ class GenerateDocxResponse(BaseModel):
     error: Optional[str] = Field(None, description="Error message if generation failed")
 
 
+# ==================== File Parsing Models ====================
 
+
+class ParseFileResponse(BaseModel):
+    """Response from file parsing."""
+
+    success: bool = Field(..., description="Whether parsing was successful")
+    filename: str = Field(..., description="Original filename")
+    file_type: Optional[str] = Field(None, description="Detected file MIME type")
+    full_text: Optional[str] = Field(None, description="Full extracted text content")
+    page_count: Optional[int] = Field(None, description="Number of pages (PDF)")
+    paragraph_count: Optional[int] = Field(None, description="Number of paragraphs (DOCX)")
+    slide_count: Optional[int] = Field(None, description="Number of slides (PPTX)")
+    metadata: Optional[Dict[str, Any]] = Field(None, description="Document metadata")
+    error: Optional[str] = Field(None, description="Error message if parsing failed")
diff --git a/services/crawler/requirements.txt b/services/crawler/requirements.txt
@@ -20,4 +20,5 @@ markdown==3.7
 # Office document generation
 python-pptx==1.0.2      # PPTX parsing and generation
 python-docx==1.1.2      # DOCX generation
+pymupdf==1.25.5         # PDF parsing (PyMuPDF/fitz)
 
diff --git a/services/platform/app/(app)/dashboard/[id]/chat/components/chat-input.tsx b/services/platform/app/(app)/dashboard/[id]/chat/components/chat-input.tsx
@@ -2,7 +2,7 @@
 
 import { Textarea } from '@/components/ui/textarea';
 import { ComponentPropsWithoutRef, useRef, useState } from 'react';
-import { X } from 'lucide-react';
+import { X, Paperclip } from 'lucide-react';
 import { useMutation } from 'convex/react';
 import { api } from '@/convex/_generated/api';
 import { toast } from '@/hooks/use-toast';
@@ -164,6 +164,36 @@ export default function ChatInput({
     }
   };
 
+  // Handle paste event for images
+  const handlePaste = (e: React.ClipboardEvent) => {
+    const items = e.clipboardData?.items;
+    if (!items) return;
+
+    const imageFiles: File[] = [];
+    for (let i = 0; i < items.length; i++) {
+      const item = items[i];
+      if (item.type.startsWith('image/')) {
+        const file = item.getAsFile();
+        if (file) {
+          // Create a meaningful filename with timestamp
+          const extension = item.type.split('/')[1] || 'png';
+          const timestamp = new Date().toISOString().replace(/[:.]/g, '-');
+          const renamedFile = new File([file], `pasted-image-${timestamp}.${extension}`, {
+            type: file.type,
+          });
+          imageFiles.push(renamedFile);
+        }
+      }
+    }
+
+    if (imageFiles.length > 0) {
+      // Create a DataTransfer to get a FileList
+      const dataTransfer = new DataTransfer();
+      imageFiles.forEach((file) => dataTransfer.items.add(file));
+      uploadFiles(dataTransfer.files);
+    }
+  };
+
   const handleFileInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
     const files = e.target.files;
     if (files && files.length > 0) {
@@ -311,6 +341,7 @@ export default function ChatInput({
               value={value}
               onChange={(e) => handleInputChange(e.target.value)}
               onKeyDown={handleKeyDown}
+              onPaste={handlePaste}
               className="min-h-[100px] relative border-0 shadow-none resize-none focus-visible:ring-0 focus-visible:ring-offset-0 text-foreground px-0 py-0 bg-transparent placeholder:text-muted-foreground"
               disabled={isLoading}
               placeholder=""
@@ -335,10 +366,25 @@ export default function ChatInput({
                     </svg>
                   </span>
                 </div>
-                to send or drag files here.
+                to send
               </div>
             )}
           </div>
+
+          {/* Action buttons row */}
+          <div className="flex items-center pb-3">
+            {/* Attachment button */}
+            <button
+              type="button"
+              onClick={() => fileInputRef.current?.click()}
+              disabled={isLoading}
+              className="flex items-center gap-1.5 text-muted-foreground hover:text-foreground transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
+              title="Attach files"
+            >
+              <Paperclip className="size-4" />
+              <span className="text-xs">Attach</span>
+            </button>
+          </div>
         </div>
       </div>
     </div>

diff --git a/services/platform/app/(app)/dashboard/[id]/chat/components/chat-interface.tsx b/services/platform/app/(app)/dashboard/[id]/chat/components/chat-interface.tsx
@@ -202,7 +202,15 @@ export default function ChatInterface({
     if (
       optimisticMessage?.content &&
       rawThreadMessages !== undefined &&
-      threadMessages?.some((m) => m.role === 'user' && m.content === optimisticMessage.content)
+      threadMessages?.some((m) => {
+        if (m.role !== 'user') return false;
+        // Check for exact match OR if the message starts with the optimistic content
+        // (handles case where images are appended as markdown)
+        return (
+          m.content === optimisticMessage.content ||
+          m.content.startsWith(optimisticMessage.content)
+        );
+      })
     ) {
       setOptimisticMessage(null);
     }
@@ -283,10 +291,19 @@ export default function ChatInterface({
       }
 
       // Send message and start polling
+      // Convert attachments to the format expected by the mutation
+      const mutationAttachments = attachments?.map((a) => ({
+        fileId: a.fileId,
+        fileName: a.fileName,
+        fileType: a.fileType,
+        fileSize: a.fileSize,
+      }));
+
       const result = await chatWithAgent({
         threadId: currentThreadId,
         organizationId,
         message: userMessage.content,
+        attachments: mutationAttachments,
       });
 
       setCurrentRunId(result.runId);

diff --git a/services/platform/convex/_generated/api.d.ts b/services/platform/convex/_generated/api.d.ts
@@ -24,6 +24,7 @@ import type * as agent_tools_convex_tools_customers_helpers_types from "../agent
 import type * as agent_tools_convex_tools_files_docx_tool from "../agent_tools/convex_tools/files/docx_tool.js";
 import type * as agent_tools_convex_tools_files_generate_excel_tool from "../agent_tools/convex_tools/files/generate_excel_tool.js";
 import type * as agent_tools_convex_tools_files_helpers_check_resource_accessible from "../agent_tools/convex_tools/files/helpers/check_resource_accessible.js";
+import type * as agent_tools_convex_tools_files_helpers_parse_file from "../agent_tools/convex_tools/files/helpers/parse_file.js";
 import type * as agent_tools_convex_tools_files_image_tool from "../agent_tools/convex_tools/files/image_tool.js";
 import type * as agent_tools_convex_tools_files_pdf_tool from "../agent_tools/convex_tools/files/pdf_tool.js";
 import type * as agent_tools_convex_tools_files_pptx_tool from "../agent_tools/convex_tools/files/pptx_tool.js";
@@ -623,6 +624,7 @@ declare const fullApi: ApiFromModules<{
   "agent_tools/convex_tools/files/docx_tool": typeof agent_tools_convex_tools_files_docx_tool;
   "agent_tools/convex_tools/files/generate_excel_tool": typeof agent_tools_convex_tools_files_generate_excel_tool;
   "agent_tools/convex_tools/files/helpers/check_resource_accessible": typeof agent_tools_convex_tools_files_helpers_check_resource_accessible;
+  "agent_tools/convex_tools/files/helpers/parse_file": typeof agent_tools_convex_tools_files_helpers_parse_file;
   "agent_tools/convex_tools/files/image_tool": typeof agent_tools_convex_tools_files_image_tool;
   "agent_tools/convex_tools/files/pdf_tool": typeof agent_tools_convex_tools_files_pdf_tool;
   "agent_tools/convex_tools/files/pptx_tool": typeof agent_tools_convex_tools_files_pptx_tool;