Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ logs: ## display app-dev logs (follow mode)
.PHONY: logs

run-backend: ## Start only the backend application and all needed services
@$(COMPOSE) up --force-recreate -d docspec
@$(COMPOSE) up --force-recreate -d celery-dev
@$(COMPOSE) up --force-recreate -d y-provider-development
@$(COMPOSE) up --force-recreate -d nginx
Expand Down
5 changes: 5 additions & 0 deletions compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,8 @@ services:
kc_postgresql:
condition: service_healthy
restart: true

docspec:
image: ghcr.io/docspecio/api:2.0.0
ports:
- "4000:4000"
1 change: 1 addition & 0 deletions docs/env.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ These are the environment variables you can set for the `impress-backend` contai
| USER_OIDC_ESSENTIAL_CLAIMS | Essential claims in OIDC token | [] |
| Y_PROVIDER_API_BASE_URL | Y Provider url | |
| Y_PROVIDER_API_KEY | Y provider API key | |
| DOCSPEC_API_URL | URL to endpoint of DocSpec conversion API | |


## impress-frontend image
Expand Down
4 changes: 3 additions & 1 deletion env.d/development/common
Original file line number Diff line number Diff line change
Expand Up @@ -67,5 +67,7 @@ DJANGO_SERVER_TO_SERVER_API_TOKENS=server-api-token
Y_PROVIDER_API_BASE_URL=http://y-provider-development:4444/api/
Y_PROVIDER_API_KEY=yprovider-api-key

DOCSPEC_API_URL=http://docspec:4000/conversion

# Theme customization
THEME_CUSTOMIZATION_CACHE_TIMEOUT=15
THEME_CUSTOMIZATION_CACHE_TIMEOUT=15
2 changes: 1 addition & 1 deletion env.d/development/common.e2e
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ Y_PROVIDER_API_BASE_URL=http://y-provider:4444/api/

# Throttle
API_DOCUMENT_THROTTLE_RATE=1000/min
API_CONFIG_THROTTLE_RATE=1000/min
API_CONFIG_THROTTLE_RATE=1000/min
11 changes: 9 additions & 2 deletions src/backend/core/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,15 @@
from django.utils.text import slugify
from django.utils.translation import gettext_lazy as _

from core.services import mime_types
import magic
from rest_framework import serializers

from core import choices, enums, models, utils, validators
from core.services.ai_services import AI_ACTIONS
from core.services.converter_services import (
ConversionError,
YdocConverter,
Converter,
)


Expand Down Expand Up @@ -188,6 +189,7 @@ class DocumentSerializer(ListDocumentSerializer):

content = serializers.CharField(required=False)
websocket = serializers.BooleanField(required=False, write_only=True)
file = serializers.FileField(required=False, write_only=True, allow_null=True)

class Meta:
model = models.Document
Expand All @@ -204,6 +206,7 @@ class Meta:
"deleted_at",
"depth",
"excerpt",
"file",
"is_favorite",
"link_role",
"link_reach",
Expand Down Expand Up @@ -461,7 +464,11 @@ def create(self, validated_data):
language = user.language or language

try:
document_content = YdocConverter().convert(validated_data["content"])
document_content = Converter().convert(
validated_data["content"],
mime_types.MARKDOWN,
mime_types.YJS
)
except ConversionError as err:
raise serializers.ValidationError(
{"content": ["Could not convert content"]}
Expand Down
40 changes: 30 additions & 10 deletions src/backend/core/api/viewsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,12 @@
from core.services.ai_services import AIService
from core.services.collaboration_services import CollaborationService
from core.services.converter_services import (
ConversionError,
ServiceUnavailableError as YProviderServiceUnavailableError,
)
from core.services.converter_services import (
ValidationError as YProviderValidationError,
Converter,
)
from core.services.converter_services import (
YdocConverter,
)
from core.services import mime_types
from core.tasks.mail import send_ask_for_access_mail
from core.utils import extract_attachments, filter_descendants

Expand Down Expand Up @@ -504,6 +502,28 @@ def perform_create(self, serializer):
"IN SHARE ROW EXCLUSIVE MODE;"
)

# Remove file from validated_data as it's not a model field
# Process it if present
uploaded_file = serializer.validated_data.pop("file", None)

# If a file is uploaded, convert it to Yjs format and set as content
if uploaded_file:
try:
file_content = uploaded_file.read()

converter = Converter()
converted_content = converter.convert(
file_content,
content_type=uploaded_file.content_type,
accept=mime_types.YJS
)
serializer.validated_data["content"] = converted_content
serializer.validated_data["title"] = uploaded_file.name
except ConversionError as err:
raise drf.exceptions.ValidationError(
{"file": ["Could not convert file content"]}
) from err

obj = models.Document.add_root(
creator=self.request.user,
**serializer.validated_data,
Expand Down Expand Up @@ -1603,14 +1623,14 @@ def content(self, request, pk=None):
if base64_content is not None:
# Convert using the y-provider service
try:
yprovider = YdocConverter()
yprovider = Converter()
result = yprovider.convert(
base64.b64decode(base64_content),
"application/vnd.yjs.doc",
mime_types.YJS,
{
"markdown": "text/markdown",
"html": "text/html",
"json": "application/json",
"markdown": mime_types.MARKDOWN,
"html": mime_types.HTML,
"json": mime_types.JSON,
}[content_format],
)
content = result
Expand Down
71 changes: 65 additions & 6 deletions src/backend/core/services/converter_services.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
from django.conf import settings

import requests
import typing

from core.services import mime_types

class ConversionError(Exception):
"""Base exception for conversion-related errors."""
Expand All @@ -19,8 +21,65 @@ class ServiceUnavailableError(ConversionError):
"""Raised when the conversion service is unavailable."""


class ConverterProtocol(typing.Protocol):
def convert(self, text, content_type, accept): ...


class Converter:
docspec: ConverterProtocol
ydoc: ConverterProtocol

def __init__(self):
self.docspec = DocSpecConverter()
self.ydoc = YdocConverter()

def convert(self, input, content_type, accept):
"""Convert input into other formats using external microservices."""

if content_type == mime_types.DOCX and accept == mime_types.YJS:
return self.convert(
self.docspec.convert(input, mime_types.DOCX, mime_types.BLOCKNOTE),
mime_types.BLOCKNOTE,
mime_types.YJS
)

return self.ydoc.convert(input, content_type, accept)


class DocSpecConverter:
"""Service class for DocSpec conversion-related operations."""

def _request(self, url, data, content_type):
"""Make a request to the DocSpec API."""

response = requests.post(
url,
headers={"Accept": mime_types.BLOCKNOTE},
files={"file": ("document.docx", data, content_type)},
timeout=settings.CONVERSION_API_TIMEOUT,
verify=settings.CONVERSION_API_SECURE,
)
response.raise_for_status()
return response

def convert(self, data, content_type, accept):
"""Convert a Document to BlockNote."""
if not data:
raise ValidationError("Input data cannot be empty")

if content_type != mime_types.DOCX or accept != mime_types.BLOCKNOTE:
raise ValidationError(f"Conversion from {content_type} to {accept} is not supported.")

try:
return self._request(settings.DOCSPEC_API_URL, data, content_type).content
except requests.RequestException as err:
raise ServiceUnavailableError(
"Failed to connect to DocSpec conversion service",
) from err


class YdocConverter:
"""Service class for conversion-related operations."""
"""Service class for YDoc conversion-related operations."""

@property
def auth_header(self):
Expand All @@ -45,7 +104,7 @@ def _request(self, url, data, content_type, accept):
return response

def convert(
self, text, content_type="text/markdown", accept="application/vnd.yjs.doc"
self, text, content_type=mime_types.MARKDOWN, accept=mime_types.YJS
):
"""Convert a Markdown text into our internal format using an external microservice."""

Expand All @@ -59,14 +118,14 @@ def convert(
content_type,
accept,
)
if accept == "application/vnd.yjs.doc":
if accept == mime_types.YJS:
return b64encode(response.content).decode("utf-8")
if accept in {"text/markdown", "text/html"}:
if accept in {mime_types.MARKDOWN, "text/html"}:
return response.text
if accept == "application/json":
if accept == mime_types.JSON:
return response.json()
raise ValidationError("Unsupported format")
except requests.RequestException as err:
raise ServiceUnavailableError(
"Failed to connect to conversion service",
f"Failed to connect to YDoc conversion service {content_type}, {accept}",
) from err
6 changes: 6 additions & 0 deletions src/backend/core/services/mime_types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
BLOCKNOTE = "application/vnd.blocknote+json"
YJS = "application/vnd.yjs.doc"
MARKDOWN = "text/markdown"
JSON = "application/json"
DOCX = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
HTML = "text/html"
6 changes: 6 additions & 0 deletions src/backend/impress/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,12 @@ class Base(Configuration):
environ_prefix=None,
)

# DocSpec API microservice
DOCSPEC_API_URL = values.Value(
environ_name="DOCSPEC_API_URL",
environ_prefix=None
)

# Conversion endpoint
CONVERSION_API_ENDPOINT = values.Value(
default="convert",
Expand Down
Binary file not shown.
63 changes: 63 additions & 0 deletions src/frontend/apps/e2e/__tests__/app-impress/assets/test_import.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
![473389927-e4ff1794-69f3-460a-85f8-fec993cd74d6.png](http://localhost:3000/assets/logo-suite-numerique.png)![497094770-53e5f8e2-c93e-4a0b-a82f-cd184fd03f51.svg](http://localhost:3000/assets/assets/icon-docs.svg)

# Lorem Ipsum Markdown Document

## Introduction

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam auctor, nisl eget ultricies tincidunt, nisl nisl aliquam nisl, eget ultricies nisl nisl eget nisl.

### Subsection 1.1

* **Bold text**: Lorem ipsum dolor sit amet.

* *Italic text*: Consectetur adipiscing elit.

* ~~Strikethrough text~~: Nullam auctor, nisl eget ultricies tincidunt.

1. First item in an ordered list.

2. Second item in an ordered list.

* Indented bullet point.

* Another indented bullet point.

3. Third item in an ordered list.

### Subsection 1.2

**Code block:**

```python
def hello_world():
print("Hello, world!")
```

**Blockquote:**

> Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam auctor, nisl eget ultricies tincidunt.
**Horizontal rule:**

***

**Table:**

| Syntax | Description |
| --------- | ----------- |
| Header | Title |
| Paragraph | Text |

**Inline code:**

Use the `printf()` function.

**Link:** [Example](https://www.example.com)

**Image:**

![Alt text](https://via.placeholder.com/150)

## Conclusion

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Nullam auctor, nisl eget ultricies tincidunt, nisl nisl aliquam nisl, eget ultricies nisl nisl eget nisl.
Binary file not shown.
Binary file not shown.
Loading