Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: use more robust datetime parsing #14

Merged
merged 1 commit into from Jul 2, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
17 changes: 15 additions & 2 deletions app/factories/eml.py
Expand Up @@ -2,6 +2,7 @@
from typing import Any, Dict, List

import arrow
import dateparser
from eml_parser import EmlParser
from ioc_finder import (
parse_domain_names,
Expand All @@ -21,6 +22,17 @@ def __init__(self, eml_file: bytes):
parser = EmlParser(include_raw_body=True, include_attachment_data=True)
self.parsed = parser.decode_email_bytes(eml_file)

def _normalize_received_date(self, received: Dict):
date = received.get("date", "")
if date != "":
return received

src = received.get("src", "")
parts = src.split(";")
date_ = parts[-1].strip()
received["date"] = dateparser.parse(date_)
return received

def _normalize_received(self, received: List[Dict]) -> List[Dict]:
if len(received) == 0:
return []
Expand All @@ -30,9 +42,10 @@ def _normalize_received(self, received: List[Dict]) -> List[Dict]:
base_date = arrow.get(first.get("date", ""))

for r in received:
date = arrow.get(r.get("date", ""))
normalized = self._normalize_received_date(r)
date = arrow.get(normalized.get("date", ""))
delay = (date - base_date).seconds
r["delay"] = delay
normalized["delay"] = delay
base_date = date

return received
Expand Down
4 changes: 2 additions & 2 deletions app/schemas/eml.py
Expand Up @@ -40,12 +40,12 @@ class Config:


class Received(APIModel):
by: List[str]
by: Optional[List[str]] = None
date: datetime
for_: Optional[List[str]] = None
from_: Optional[List[str]] = None
src: str
with_: str
with_: Optional[str]
delay: int

class Config:
Expand Down
4 changes: 2 additions & 2 deletions frontend/src/types.ts
Expand Up @@ -40,12 +40,12 @@ export interface Body {
}

export interface Received {
by: string[];
by: string[] | undefined;
date: string;
for: string[] | undefined;
from: string[] | undefined;
src: string;
with: string;
with: string | undefined;
delay: number;
}

Expand Down
49 changes: 47 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Expand Up @@ -24,6 +24,7 @@ oletools = "^0.55.1"
pydantic = "^1.4"
python-multipart = "^0.0.5"
uvicorn = "^0.11.3"
dateparser = "^0.7.6"

[tool.poetry.dev-dependencies]
asynctest = "^0.13.0"
Expand All @@ -49,7 +50,7 @@ seed-isort-config = "^2.1.0"
[tool.isort]
force_grid_wrap = 0
include_trailing_comma = true
known_third_party = ["aiospamc", "arrow", "async_timeout", "asynctest", "compoundfiles", "eml_parser", "fastapi", "fastapi_utils", "httpx", "ioc_finder", "loguru", "magic", "olefile", "oletools", "pydantic", "pytest", "respx", "starlette"]
known_third_party = ["aiospamc", "arrow", "async_timeout", "asynctest", "compoundfiles", "dateparser", "eml_parser", "fastapi", "fastapi_utils", "httpx", "ioc_finder", "loguru", "magic", "olefile", "oletools", "pydantic", "pytest", "respx", "starlette"]
line_length = 88
multi_line_output = 3
use_parentheses= true
Expand Down
10 changes: 10 additions & 0 deletions tests/conftest.py
@@ -1,4 +1,6 @@
import glob
from pathlib import Path
from typing import List

import httpx
import pytest
Expand Down Expand Up @@ -43,6 +45,14 @@ def encrypted_docx_eml() -> bytes:
return read_file("encrypted_docx.eml").encode()


@pytest.fixture
def emails() -> List[bytes]:
parent = str(Path(__file__).parent.absolute())
path = parent + "/fixtures/emails/**/*.eml"
paths = glob.glob(path)
return [open(path, "rb").read() for path in paths]


@pytest.fixture
def emailrep_response() -> str:
return read_file("emailrep.json")
Expand Down
12 changes: 12 additions & 0 deletions tests/factories/test_eml.py
@@ -1,3 +1,5 @@
from typing import List

from app.factories.eml import EmlFactory


Expand Down Expand Up @@ -47,6 +49,16 @@ def test_encrypted_docx(encrypted_docx_eml):
)


def test_emails(emails: List[bytes]):
for email in emails:
try:
eml = EmlFactory.from_bytes(email)
assert eml is not None
except Exception as e:
print(e)
print(email.decode())


def test_complete_msg(complete_msg):
eml = EmlFactory.from_bytes(complete_msg)

Expand Down
2 changes: 2 additions & 0 deletions tests/fixtures/emails/.gitattributes
@@ -0,0 +1,2 @@
# Never autoconvert line endings on fixture emails
*.eml -text
@@ -0,0 +1,29 @@
Mime-Version: 1.0 (Apple Message framework v730)
Content-Type: multipart/mixed; boundary=Apple-Mail-13-196941151
Message-Id: <9169D984-4E0B-45EF-82D4-8F5E53AD7012@example.com>
From: foo@example.com
Subject: testing
Date: Mon, 6 Jun 2005 22:21:22 +0200
To: blah@example.com


--Apple-Mail-13-196941151
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
charset=ISO-8859-1;
delsp=yes;
format=flowed

This is the first part.

--Apple-Mail-13-196941151
Content-Type: text/x-ruby-script; name="hello.rb"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
filename="api.rb"

puts "Hello, world!"
gets

--Apple-Mail-13-196941151--

@@ -0,0 +1,32 @@
Mime-Version: 1.0 (Apple Message framework v730)
Content-Type: multipart/mixed; boundary=Apple-Mail-13-196941151
Message-Id: <9169D984-4E0B-45EF-82D4-8F5E53AD7012@example.com>
From: foo@example.com
Subject: testing
Date: Mon, 6 Jun 2005 22:21:22 +0200
To: blah@example.com


--Apple-Mail-13-196941151
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
charset=ISO-8859-1;
delsp=yes;
format=flowed

This is the first part.

--Apple-Mail-13-196941151
Content-Type: image/jpeg
Content-Transfer-Encoding: base64
Content-Location: Photo25.jpg
Content-ID: <qbFGyPQAS8>
Content-Disposition: inline

jamisSqGSIb3DQEHAqCAMIjamisxCzAJBgUrDgMCGgUAMIAGCSqGSjamisEHAQAAoIIFSjCCBUYw
ggQujamisQICBD++ukQwDQYJKojamisNAQEFBQAwMTELMAkGA1UEBhMCRjamisAKBgNVBAoTA1RE
QzEUMBIGjamisxMLVERDIE9DRVMgQ0jamisNMDQwMjI5MTE1OTAxWhcNMDYwMjamisIyOTAxWjCB
gDELMAkGA1UEjamisEsxKTAnBgNVBAoTIEjamisuIG9yZ2FuaXNhdG9yaXNrIHRpbjamisRuaW5=

--Apple-Mail-13-196941151--

@@ -0,0 +1,93 @@
Mime-Version: 1.0 (Apple Message framework v730)
Content-Type: multipart/mixed; boundary=Apple-Mail-13-196941151
Message-Id: <9169D984-4E0B-45EF-82D4-8F5E53AD7012@example.com>
From: foo@example.com
Subject: testing
Date: Mon, 6 Jun 2005 22:21:22 +0200
To: blah@example.com


--Apple-Mail-13-196941151
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain;
charset=ISO-8859-1;
delsp=yes;
format=flowed

This is the first part.

--Apple-Mail-13-196941151
Content-Type: message/rfc822;
name="ForwardedMessage.eml";

From xxxx@xxxx.com Tue May 10 11:28:07 2005
Return-Path: <xxxx@xxxx.com>
X-Original-To: xxxx@xxxx.com
Delivered-To: xxxx@xxxx.com
Received: from localhost (localhost [127.0.0.1])
by xxx.xxxxx.com (Postfix) with ESMTP id 50FD3A96F
for <xxxx@xxxx.com>; Tue, 10 May 2005 17:26:50 +0000 (GMT)
from: from xxx.xxxxx.com ([127.0.0.1])
by localhost (xxx.xxxxx.com [127.0.0.1]) (amavisd-new, port 10024)
with LMTP id 70060-03 for <xxxx@xxxx.com>;
Tue, 10 May 2005 17:26:49 +0000 (GMT)
Received: from xxx.xxxxx.com (xxx.xxxxx.com [69.36.39.150])
by xxx.xxxxx.com (Postfix) with ESMTP id 8B957A94B
for <xxxx@xxxx.com>; Tue, 10 May 2005 17:26:48 +0000 (GMT)
Received: from xxx.xxxxx.com (xxx.xxxxx.com [64.233.184.203])
by xxx.xxxxx.com (Postfix) with ESMTP id 9972514824C
for <xxxx@xxxx.com>; Tue, 10 May 2005 12:26:40 -0500 (CDT)
Received: by xxx.xxxxx.com with SMTP id 68so1694448wri
for <xxxx@xxxx.com>; Tue, 10 May 2005 10:26:40 -0700 (PDT)
DomainKey-Signature: a=rsa-sha1; q=dns; c=nofws;
s=beta; d=xxxxx.com;
h=received:message-id:date:from:reply-to:to:subject:mime-version:content-type;
b=g8ZO5ttS6GPEMAz9WxrRk9+9IXBUfQIYsZLL6T88+ECbsXqGIgfGtzJJFn6o9CE3/HMrrIGkN5AisxVFTGXWxWci5YA/7PTVWwPOhJff5BRYQDVNgRKqMl/SMttNrrRElsGJjnD1UyQ/5kQmcBxq2PuZI5Zc47u6CILcuoBcM+A=
Received: by 10.54.96.19 with SMTP id t19mr621017wrb;
Tue, 10 May 2005 10:26:39 -0700 (PDT)
Received: by 10.54.110.5 with HTTP; Tue, 10 May 2005 10:26:39 -0700 (PDT)
Message-ID: <xxxx@xxxx.com>
Date: Tue, 10 May 2005 11:26:39 -0600
From: Test Tester <xxxx@xxxx.com>
Reply-To: Test Tester <xxxx@xxxx.com>
To: xxxx@xxxx.com, xxxx@xxxx.com
Subject: Another PDF
Mime-Version: 1.0
Content-Type: multipart/mixed;
boundary="----=_Part_2192_32400445.1115745999735"
X-Virus-Scanned: amavisd-new at textdrive.com

------=_Part_2192_32400445.1115745999735
Content-Type: text/plain; charset=ISO-8859-1
Content-Transfer-Encoding: quoted-printable
Content-Disposition: inline

Just attaching another PDF, here, to see what the message looks like,
and to see if I can figure out what is going wrong here.

------=_Part_2192_32400445.1115745999735
Content-Type: application/pdf; name="broken.pdf"
Content-Transfer-Encoding: base64
Content-Disposition: attachment; filename="broken.pdf"

JVBERi0xLjQNCiXk9tzfDQoxIDAgb2JqDQo8PCAvTGVuZ3RoIDIgMCBSDQogICAvRmlsdGVyIC9G
bGF0ZURlY29kZQ0KPj4NCnN0cmVhbQ0KeJy9Wt2KJbkNvm/od6jrhZxYln9hWEh2p+8HBvICySaE
ycLuTV4/1ifJ9qnq09NpSBimu76yLUuy/qzqcPz7+em3Ixx/CDc6CsXxs3b5+fvfjr/8cPz6/BRu
rbfAx/n3739/fuJylJ5u5fjX81OuDr4deK4Bz3z/aDP+8fz0yw8g0Ofq7ktr1Mn+u28rvhy/jVeD
QSa+9YNKHP/pxjvDNfVAx/m3MFz54FhvTbaseaxiDoN2LeMVMw+yA7RbHSCDzxZuaYB2E1Yay7QU
x89vz0+tyFDKMlAHK5yqLmnjF+c4RjEiQIUeKwblXMe+AsZjN1J5yGQL5DHpDHksurM81rF6PKab
gK6zAarIDzIiUY23rJsN9iorAE816aIu6lsgAdQFsuhhkHOUFgVjp2GjMqSewITXNQ27jrMeamkg
1rPI3iLWG2CIaSBB+V1245YVRICGbbpYKHc2USFDl6M09acQVQYhlwIrkBNLISvXhGlF1wi5FHCw
wxZkoGNJlVeJCEsqKA+3YAV5AMb6KkeaqEJQmFKKQU8T1pRi2ihE1Y4CDrqoYFFXYjJJOatsyzuI
8SIlykuxKTMibWK8H1PgEvqYgs4GmQSrEjJAalgGirIhik+p4ZQN9E3ETFPAHE1b8pp1l/0Rc1gl
fQs0ABWvyoZZzU8VnPXwVVcO9BEsyjEJaO6eBoZRyKGlrKoYoOygA8BGIzgwN3RQ15ouigG5idZQ
fx2U4Db2CqiLO0WHAZoylGiCAqhniNQjFjQPSkmjwfNTgQ6M1Ih+eWo36wFmjIxDJZiGUBiWsAyR
xX3EekGOizkGI96Ol9zVZTAivikURhRsHh2E3JhWMpSTZCnnonrLhMCodgrNcgo4uyJUJc6qnVss
nrGd1Ptr0YwisCOYyIbUwVjV4xBUNLbguSO2YHujonAMJkMdSI7bIw91Akq2AUlMUWGFTMAOamjU
OvZQCxIkY2pCpMFo/IwLdVLHs6nddwTRrgoVbvLU9eB0G4EMndV0TNoxHbt3JBWwK6hhv3iHfDtF
yokB302IpEBTnWICde4uYc/1khDbSIkQopO6lcqamGBu1OSE3N5IPSsZX00CkSHRiiyx6HQIShsS
HSVNswdVsaOUSAWq9aYhDtGDaoG5a3lBGkYt/lFlBFt1UqrYnzVtUpUQnLiZeouKgf1KhRBViRRk
ExepJCzTwEmFDalIRbLEGtw0gfpESOpIAF/NnpPzcVCG86s0g2DuSyd41uhNGbEgaSrWEXORErbw
------=_Part_2192_32400445.1115745999735--

--Apple-Mail-13-196941151--