Skip to content

Commit

Permalink
Merge pull request #10 from ninoseki/add-msg-support
Browse files Browse the repository at this point in the history
feat: add MSG format support
  • Loading branch information
ninoseki committed Jun 30, 2020
2 parents d658b85 + cb33559 commit 9f6ce8f
Show file tree
Hide file tree
Showing 13 changed files with 1,102 additions and 25 deletions.
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -33,5 +33,5 @@ Alternatively, you can deploy the application on Heroku.

## ToDo

- [ ] Support MSG format.
- [x] Support MSG format.
- [ ] In-depth attachments analysis by using oletools.
20 changes: 12 additions & 8 deletions app/factories/eml.py
@@ -1,3 +1,4 @@
from io import BytesIO
from typing import Any, Dict, List

import arrow
Expand All @@ -10,6 +11,8 @@
)

from app.schemas.eml import Eml
from app.services import outlookmsgfile
from app.services.validator import is_eml_file


class EmlFactory:
Expand Down Expand Up @@ -64,12 +67,6 @@ def _normalize_bodies(self):
self.parsed["bodies"] = [self._normalize_body(body) for body in bodies]
del self.parsed["body"]

def _normalize_attachment(self, attachment: Dict[str, Any]):
attachment["content_header"] = self._normalize_key_value_header(
attachment.get("content_header", {})
)
return attachment

def _normalize_attachments(self):
# change "attachment" to "attachments"
attachments = self.parsed.get("attachment", [])
Expand All @@ -87,6 +84,13 @@ def to_model(self) -> Eml:
return Eml.parse_obj(self.parsed)

@classmethod
def from_bytes(cls, eml_file: bytes) -> Eml:
obj = cls(eml_file)
def from_bytes(cls, data: bytes) -> Eml:
if is_eml_file(data):
obj = cls(data)
return obj.to_model()

# assume data is a msg file
file = BytesIO(data)
message = outlookmsgfile.load(file)
obj = cls(message.as_bytes())
return obj.to_model()
6 changes: 3 additions & 3 deletions app/schemas/payload.py
@@ -1,7 +1,7 @@
from fastapi_utils.api_model import APIModel
from pydantic import validator

from app.services.validator import is_eml_file
from app.services.validator import is_eml_or_msg_file


class Payload(APIModel):
Expand All @@ -13,6 +13,6 @@ class FilePayload(APIModel):

@validator("file")
def eml_file_must_be_eml(cls, v: bytes):
if is_eml_file(v) is False:
raise ValueError("Invalid EML file.")
if not is_eml_or_msg_file(v):
raise ValueError("Invalid file format.")
return v

0 comments on commit 9f6ce8f

Please sign in to comment.