Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@ jobs:
tags: |
type=ref,event=branch
type=ref,event=pr
type=sha,prefix={{branch}}-
type=raw,value=latest,enable={{is_default_branch}}
# Add SHA tags safely for both branches and PRs without generating an invalid leading '-'
type=sha,enable=true,prefix=${{ github.ref_name }}-
type=sha,enable=${{ github.event_name == 'pull_request' }},prefix=pr-
type=raw,value=latest,enable=${{ github.ref == 'refs/heads/main' }}

- name: Build test Docker image
uses: docker/build-push-action@v6
Expand Down Expand Up @@ -72,4 +74,3 @@ jobs:
VITE_APP_NAME=Chat UI
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ test-results/

# MinIO Data (persistent storage)
data/minio/
minio-data/

# Legacy S3 Mock Storage (deprecated)
mocks/s3-mock/s3-mock-storage/
mocks/s3-mock/s3-mock-storage/
81 changes: 78 additions & 3 deletions backend/application/chat/service.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,7 @@ async def handle_chat_message(
# Prepend as system message override
messages = [{"role": "system", "content": prompt_text}] + messages
logger.info(
"Applied MCP system prompt override from %s:%s (len=%d)",
server,
prompt_name,
"Applied MCP system prompt override (len=%d)",
len(prompt_text),
)
break # apply only one
Expand Down Expand Up @@ -321,6 +319,83 @@ async def handle_reset_session(
"message": "New session created"
}

async def handle_attach_file(
self,
session_id: UUID,
s3_key: str,
user_email: Optional[str] = None,
update_callback: Optional[UpdateCallback] = None
) -> Dict[str, Any]:
"""Attach a file from library to the current session."""
session = self.sessions.get(session_id)
if not session:
session = await self.create_session(session_id, user_email)

# Verify the file exists and belongs to the user
if not self.file_manager or not user_email:
return {
"type": "file_attach",
"s3_key": s3_key,
"success": False,
"error": "File manager not available or no user email"
}
Comment on lines +335 to +341
Copy link

Copilot AI Oct 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The error message combines two distinct failure cases. Split this into separate error messages: 'File manager not available' and 'User email required' for clearer debugging and better user experience.

Suggested change
if not self.file_manager or not user_email:
return {
"type": "file_attach",
"s3_key": s3_key,
"success": False,
"error": "File manager not available or no user email"
}
if not self.file_manager:
return {
"type": "file_attach",
"s3_key": s3_key,
"success": False,
"error": "File manager not available"
}
if not user_email:
return {
"type": "file_attach",
"s3_key": s3_key,
"success": False,
"error": "User email required"
}

Copilot uses AI. Check for mistakes.

try:
# Get file metadata
file_result = await self.file_manager.get_file(user_email, s3_key)
if not file_result:
return {
"type": "file_attach",
"s3_key": s3_key,
"success": False,
"error": "File not found"
}

filename = file_result.get("filename")
if not filename:
return {
"type": "file_attach",
"s3_key": s3_key,
"success": False,
"error": "Invalid file metadata"
}

# Add file to session context
session.context = await file_utils.handle_session_files(
session_context=session.context,
user_email=user_email,
files_map={
filename: {
"key": s3_key,
"content_type": file_result.get("content_type"),
"size": file_result.get("size"),
"filename": filename
}
},
file_manager=self.file_manager,
update_callback=update_callback
)

sanitized_s3_key = s3_key.replace('\r', '').replace('\n', '')
logger.info(f"Attached file ({sanitized_s3_key}) to session {session_id}")

return {
"type": "file_attach",
"s3_key": s3_key,
"filename": filename,
"success": True,
"message": f"File {filename} attached to session"
}

except Exception as e:
logger.error(f"Failed to attach file {s3_key.replace('\n', '').replace('\r', '')} to session {session_id}: {str(e).replace('\n', '').replace('\r', '')}")
return {
"type": "file_attach",
"s3_key": s3_key,
"success": False,
"error": str(e)
}

async def _handle_plain_mode(
self,
session: Session,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""WebSocket connection adapter implementing ChatConnectionProtocol."""

from typing import Any, Dict
from typing import Any, Dict, Optional

from fastapi import WebSocket

Expand All @@ -12,10 +12,11 @@ class WebSocketConnectionAdapter:
Adapter that wraps FastAPI WebSocket to implement ChatConnectionProtocol.
This isolates the application layer from FastAPI-specific types.
"""
def __init__(self, websocket: WebSocket):
"""Initialize with FastAPI WebSocket."""

def __init__(self, websocket: WebSocket, user_email: Optional[str] = None):
"""Initialize with FastAPI WebSocket and associated user."""
self.websocket = websocket
self.user_email = user_email

async def send_json(self, data: Dict[str, Any]) -> None:
"""Send JSON data to the client."""
Expand Down
54 changes: 49 additions & 5 deletions backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,46 @@ async def logo_png():
async def websocket_endpoint(websocket: WebSocket):
"""
Main chat WebSocket endpoint using new architecture.

SECURITY NOTE - Production Architecture:
==========================================
This endpoint appears to lack authentication when viewed in isolation,
but in production it sits behind a reverse proxy with a separate
authentication service. The authentication flow is:

1. Client connects to WebSocket endpoint
2. Reverse proxy intercepts WebSocket handshake (HTTP Upgrade request)
3. Reverse proxy delegates to authentication service
4. Auth service validates JWT/session from cookies or headers
5. If valid: Auth service returns X-Authenticated-User header
6. Reverse proxy forwards connection to this app with X-Authenticated-User header
7. This app trusts the header (already validated by auth service)

SECURITY REQUIREMENTS:
- This app MUST ONLY be accessible via reverse proxy
- Direct public access to this app bypasses authentication
- Use network isolation to prevent direct access
- The /login endpoint lives in the separate auth service

DEVELOPMENT vs PRODUCTION:
- Production: Extracts user from X-Authenticated-User header (set by reverse proxy)
- Development: Falls back to 'user' query parameter (INSECURE, local only)

See docs/security_architecture.md for complete architecture details.
"""
await websocket.accept()

# Basic auth: derive user from query parameters or use test user
user_email = websocket.query_params.get('user')
if not user_email:
# Fallback to test user or require auth
config_manager = app_factory.get_config_manager()
user_email = config_manager.app_settings.test_user or 'test@test.com'

session_id = uuid4()
# Create connection adapter and chat service
connection_adapter = WebSocketConnectionAdapter(websocket)

# Create connection adapter with authenticated user and chat service
connection_adapter = WebSocketConnectionAdapter(websocket, user_email)
chat_service = app_factory.create_chat_service(connection_adapter)

logger.info(f"WebSocket connection established for session {session_id}")
Expand All @@ -192,7 +226,7 @@ async def websocket_endpoint(websocket: WebSocket):
if message_type == "chat":
# Handle chat message with streaming updates
try:
response = await chat_service.handle_chat_message(
await chat_service.handle_chat_message(
session_id=session_id,
content=data.get("content", ""),
model=data.get("model", ""),
Expand Down Expand Up @@ -237,7 +271,17 @@ async def websocket_endpoint(websocket: WebSocket):
user_email=data.get("user")
)
await websocket.send_json(response)


elif message_type == "attach_file":
# Handle file attachment to session (use authenticated user, not client-sent)
response = await chat_service.handle_attach_file(
session_id=session_id,
s3_key=data.get("s3_key"),
user_email=user_email, # Use authenticated user from connection
update_callback=lambda message: websocket_update_callback(websocket, message)
)
await websocket.send_json(response)

else:
logger.warning(f"Unknown message type: {message_type}")
await websocket.send_json({
Expand Down
79 changes: 45 additions & 34 deletions backend/routes/files_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from fastapi import APIRouter, Depends, HTTPException, Request, Response
from fastapi import Query
import base64
from pydantic import BaseModel
from pydantic import BaseModel, Field

from core.utils import get_current_user
from infrastructure.app_factory import app_factory
Expand All @@ -26,7 +26,7 @@ class FileUploadRequest(BaseModel):
filename: str
content_base64: str
content_type: Optional[str] = "application/octet-stream"
tags: Optional[Dict[str, str]] = {}
tags: Optional[Dict[str, str]] = Field(default_factory=dict)


class FileResponse(BaseModel):
Expand All @@ -51,12 +51,39 @@ class FileContentResponse(BaseModel):
tags: Dict[str, str]


@router.get("/files/healthz")
async def files_health_check():
"""Health check for files service.

Note: Declared before the dynamic /files/{file_key} route to avoid path capture.
"""
s3_client = app_factory.get_file_storage()
return {
"status": "healthy",
"service": "files-api",
"s3_config": {
"endpoint": s3_client.endpoint_url if hasattr(s3_client, 'endpoint_url') else "unknown",
"bucket": s3_client.bucket_name if hasattr(s3_client, 'bucket_name') else "unknown"
}
}


@router.post("/files", response_model=FileResponse)
async def upload_file(
request: FileUploadRequest,
current_user: str = Depends(get_current_user)
) -> FileResponse:
"""Upload a file to S3 storage."""
# Validate base64 content size (configurable limit to prevent abuse)
try:
content_size = len(request.content_base64) * 3 // 4 # approximate decoded size
Copy link

Copilot AI Oct 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The base64 size estimation formula * 3 // 4 should be documented more thoroughly. This approximation doesn't account for padding characters and may underestimate the actual decoded size by up to 2 bytes, potentially allowing files slightly larger than the intended limit.

Suggested change
content_size = len(request.content_base64) * 3 // 4 # approximate decoded size
# Decode base64 to get the exact size of the file in bytes.
decoded_content = base64.b64decode(request.content_base64, validate=True)
content_size = len(decoded_content)

Copilot uses AI. Check for mistakes.
except Exception:
raise HTTPException(status_code=400, detail="Invalid base64 content")

max_size = 250 * 1024 * 1024 # 250MB default (configurable)
if content_size > max_size:
raise HTTPException(status_code=413, detail=f"File too large. Maximum size is {max_size // (1024*1024)}MB")

try:
s3_client = app_factory.get_file_storage()
result = await s3_client.upload_file(
Expand All @@ -75,21 +102,6 @@ async def upload_file(
raise HTTPException(status_code=500, detail=f"Upload failed: {str(e)}")


# Place health endpoint before dynamic /files/{file_key} routes to avoid capture
@router.get("/files/healthz")
async def files_health_check():
"""Health check for files service."""
s3_client = app_factory.get_file_storage()
return {
"status": "healthy",
"service": "files-api",
"s3_config": {
"endpoint": s3_client.endpoint_url if hasattr(s3_client, 'endpoint_url') else "unknown",
"bucket": s3_client.bucket_name if hasattr(s3_client, 'bucket_name') else "unknown"
}
}


@router.get("/files/{file_key}", response_model=FileContentResponse)
async def get_file(
file_key: str,
Expand Down Expand Up @@ -128,9 +140,22 @@ async def list_files(
file_type=file_type,
limit=limit
)

return [FileResponse(**file_data) for file_data in result]


# Convert any datetime objects to ISO format strings for pydantic validation
processed_files = []
for file_data in result:
processed_file = file_data.copy()
if not isinstance(processed_file.get('last_modified'), str):
# Convert datetime to ISO format string if it's not already a string
try:
processed_file['last_modified'] = processed_file['last_modified'].isoformat()
except AttributeError:
# If it's not a datetime object, convert to string
processed_file['last_modified'] = str(processed_file['last_modified'])
processed_files.append(processed_file)

return [FileResponse(**file_data) for file_data in processed_files]

except Exception as e:
logger.error(f"Error listing files: {str(e)}")
raise HTTPException(status_code=500, detail=f"Failed to list files: {str(e)}")
Expand Down Expand Up @@ -180,20 +205,6 @@ async def get_user_file_stats(
raise HTTPException(status_code=500, detail=f"Failed to get stats: {str(e)}")


@router.get("/files/healthz")
async def files_health_check():
"""Health check for files service."""
s3_client = app_factory.get_file_storage()
return {
"status": "healthy",
"service": "files-api",
"s3_config": {
"endpoint": s3_client.endpoint_url if hasattr(s3_client, 'endpoint_url') else "unknown",
"bucket": s3_client.bucket_name if hasattr(s3_client, 'bucket_name') else "unknown"
}
}


@router.get("/files/download/{file_key:path}")
async def download_file(
file_key: str,
Expand Down
Loading
Loading