Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bugfix: parse email with no text content #15

Merged
merged 3 commits into from
Aug 21, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 27 additions & 5 deletions decoders.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,34 +129,56 @@ func decodeInlineFile(part *multipart.Part, cte ContentTransferEncoding) (Inline
return ifl, nil
}

func decodeAttachedFile(part *multipart.Part, cte ContentTransferEncoding) (AttachedFile, error) {
func decodeAttachmentFileFromBody(body io.Reader, headers Headers, cte ContentTransferEncoding) (AttachedFile, error) {
var afl AttachedFile

decoded, err := decodeContent(body, nil, cte)
if err != nil {
return afl, fmt.Errorf(
"letters.decoders.decodeAttachmentFileFromBody: cannot decode attached file content: %w",
err)
}

afl.ContentType = headers.ContentType
afl.ContentDisposition = headers.ContentDisposition
afl.Data, err = ioutil.ReadAll(decoded)
if err != nil {
return afl, fmt.Errorf(
"letters.decoders.decodeAttachmentFileFromBody: cannot read attached file data: %w",
err)
}

return afl, nil
}

func decodeAttachedFileFromPart(part *multipart.Part, cte ContentTransferEncoding) (AttachedFile, error) {
var afl AttachedFile

decoded, err := decodeContent(part, nil, cte)
if err != nil {
return afl, fmt.Errorf(
"letters.decoders.decodeAttachedFile: cannot decode attached file content: %w",
"letters.decoders.decodeAttachedFileFromPart: cannot decode attached file content: %w",
err)
}

afl.ContentType, err = parseContentTypeHeader(part.Header.Get("Content-Type"))
if err != nil {
return afl, fmt.Errorf(
"letters.decoders.decodeAttachedFile: cannot parse Content-Type of attached file: %w",
"letters.decoders.decodeAttachedFileFromPart: cannot parse Content-Type of attached file: %w",
err)
}

afl.ContentDisposition, err = parseContentDisposition(part.Header.Get("Content-Disposition"))
if err != nil {
return afl, fmt.Errorf(
"letters.decoders.decodeAttachedFile: cannot parse Content-Disposition of attached file: %w",
"letters.decoders.decodeAttachedFileFromPart: cannot parse Content-Disposition of attached file: %w",
err)
}

afl.Data, err = ioutil.ReadAll(decoded)
if err != nil {
return afl, fmt.Errorf(
"letters.decoders.decodeAttachedFile: cannot read attached file data: %w",
"letters.decoders.decodeAttachedFileFromPart: cannot read attached file data: %w",
err)
}

Expand Down
12 changes: 7 additions & 5 deletions letters.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,13 @@ func ParseEmail(r io.Reader) (Email, error) {
email.AttachedFiles = emailBodies.AttachedFiles

} else {
return email, fmt.Errorf(
"letters.ParseEmail: cannot parse unknown Content-Type %q: %w",
email.Headers.ContentType.ContentType,
&UnknownContentTypeError{contentType: email.Headers.ContentType.ContentType},
)
afl, err := decodeAttachmentFileFromBody(msg.Body, email.Headers, cte)
if err != nil {
return email, fmt.Errorf(
"letters.decoders.ParseEmail: cannot decode attached file content from body: %w",
err)
}
email.AttachedFiles = append(email.AttachedFiles, afl)
}

email.Text = normalizeMultilineString(email.Text)
Expand Down
105 changes: 105 additions & 0 deletions letters_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,111 @@ defaults (e.g. text/plain Content-Type).`,
testEmailFromFile(t, fp, expectedEmail)
}

func TestParseEmailEnglishNoTextContent(t *testing.T) {
fp := "tests/test_english_no_text_content.txt"
tz, _ := time.LoadLocation("Europe/London")
expectedDate, _ := time.Parse(
time.RFC1123Z+" (MST)",
time.Date(2019, time.April, 1, 7, 55, 0, 0, tz).Format(time.RFC1123Z+" (MST)"))
expectedEmail := Email{
Headers: Headers{
Date: expectedDate,
Subject: "Test No Text Content, Attachment Only",
ReplyTo: []*mail.Address{
{
Name: "Alice Sender",
Address: "alice.sender@example.net",
},
},
Sender: &mail.Address{
Name: "Alice Sender",
Address: "alice.sender@example.com",
},
From: []*mail.Address{
{
Name: "Alice Sender",
Address: "alice.sender@example.com",
},
{
Name: "Alice Sender",
Address: "alice.sender@example.net",
},
},
To: []*mail.Address{
{
Name: "Bob Recipient",
Address: "bob.recipient@example.com",
},
{
Name: "Carol Recipient",
Address: "carol.recipient@example.com",
},
},
Cc: []*mail.Address{
{
Name: "Dan Recipient",
Address: "dan.recipient@example.com",
},
{
Name: "Eve Recipient",
Address: "eve.recipient@example.com",
},
},
Bcc: []*mail.Address{
{
Name: "Frank Recipient",
Address: "frank.recipient@example.com",
},
{
Name: "Grace Recipient",
Address: "grace.recipient@example.com",
},
},
MessageID: "Message-Id-1@example.com",
ContentType: ContentTypeHeader{
ContentType: "application/pdf",
Params: map[string]string{
"name": "attached-pdf-name.pdf",
},
},
ContentDisposition: ContentDispositionHeader{
ContentDisposition: attachment,
Params: map[string]string{
"filename": "attached-pdf-filename.pdf",
},
},
ExtraHeaders: map[string][]string{
"X-Clacks-Overhead": {"GNU Terry Pratchett"},
},
},
Text: "",
EnrichedText: "",
HTML: "",
AttachedFiles: []AttachedFile{
{
ContentType: ContentTypeHeader{
ContentType: "application/pdf",
Params: map[string]string{
"name": "attached-pdf-name.pdf",
},
},
ContentDisposition: ContentDispositionHeader{
ContentDisposition: attachment,
Params: map[string]string{
"filename": "attached-pdf-filename.pdf",
},
},
Data: []byte{37, 80, 68, 70, 45, 49, 46, 13, 116, 114, 97, 105, 108, 101, 114, 60, 60,
47, 82, 111, 111, 116, 60, 60, 47, 80, 97, 103, 101, 115, 60, 60, 47, 75, 105, 100, 115, 91, 60,
60, 47, 77, 101, 100, 105, 97, 66, 111, 120, 91, 48, 32, 48, 32, 51, 32, 51, 93, 62, 62, 93, 62,
62, 62, 62, 62, 62},
},
},
}

testEmailFromFile(t, fp, expectedEmail)
}

func TestParseEmailEnglishPlaintextAsciiOver7bit(t *testing.T) {
fp := "tests/test_english_plaintext_ascii_over_7bit.txt"
tz, _ := time.LoadLocation("Europe/London")
Expand Down
49 changes: 26 additions & 23 deletions parsers.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,8 @@ func parseHeaders(header mail.Header) (Headers, error) {
err)
}

contentDisposition, _ := parseContentDisposition(header.Get("Content-Disposition"))

var extraHeaders = make(map[string][]string)
for key, value := range header {
_, isKnownHeader := knownHeaders[key]
Expand Down Expand Up @@ -322,28 +324,29 @@ func parseHeaders(header mail.Header) (Headers, error) {
}

return Headers{
Date: parseDateHeader(header.Get("Date")),
Sender: sender,
From: from,
ReplyTo: replyTo,
To: to,
Cc: cc,
Bcc: bcc,
MessageID: parseMessageIdHeader(header.Get("Message-ID")),
InReplyTo: parseCommaSeparatedMessageIdHeader(header.Get("In-Reply-To")),
References: parseCommaSeparatedMessageIdHeader(header.Get("References")),
Subject: parseStringHeader(header.Get("Subject")),
Comments: parseStringHeader(header.Get("Comments")),
Keywords: parseCommaSeparatedStringHeader(header.Get("Keywords")),
ResentDate: parseDateHeader(header.Get("Resent-Date")),
ResentFrom: resentFrom,
ResentSender: resentSender,
ResentTo: resentTo,
ResentCc: resentCc,
ResentBcc: resentBcc,
ResentMessageID: parseMessageIdHeader(header.Get("Resent-Message-ID")),
ContentType: contentType,
ExtraHeaders: extraHeaders,
Date: parseDateHeader(header.Get("Date")),
Sender: sender,
From: from,
ReplyTo: replyTo,
To: to,
Cc: cc,
Bcc: bcc,
MessageID: parseMessageIdHeader(header.Get("Message-ID")),
InReplyTo: parseCommaSeparatedMessageIdHeader(header.Get("In-Reply-To")),
References: parseCommaSeparatedMessageIdHeader(header.Get("References")),
Subject: parseStringHeader(header.Get("Subject")),
Comments: parseStringHeader(header.Get("Comments")),
Keywords: parseCommaSeparatedStringHeader(header.Get("Keywords")),
ResentDate: parseDateHeader(header.Get("Resent-Date")),
ResentFrom: resentFrom,
ResentSender: resentSender,
ResentTo: resentTo,
ResentCc: resentCc,
ResentBcc: resentBcc,
ResentMessageID: parseMessageIdHeader(header.Get("Resent-Message-ID")),
ContentType: contentType,
ContentDisposition: contentDisposition,
ExtraHeaders: extraHeaders,
}, nil
}

Expand Down Expand Up @@ -500,7 +503,7 @@ func parsePart(msg io.Reader, parentContentType ContentTypeHeader, boundary stri
}
emailBodies.InlineFiles = append(emailBodies.InlineFiles, inlineFile)
} else if isAttFile {
attachedFile, err := decodeAttachedFile(part, cte)
attachedFile, err := decodeAttachedFileFromPart(part, cte)
if err != nil {
return emailBodies, fmt.Errorf(
"letters.parsers.parsePart: cannot decode attached file: %w",
Expand Down
6 changes: 4 additions & 2 deletions structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ var knownHeaders = map[string]void{
"Resent-Message-Id": member,
"Content-Transfer-Encoding": member,
"Content-Type": member,
"Content-Disposition": member,
}

type ContentDisposition string
Expand Down Expand Up @@ -525,8 +526,9 @@ type Headers struct {
// If another top-level type is to be used for any reason, it must be
// given a name starting with "X-" to indicate its non-standard status
// and to avoid a potential conflict with a future official name.
ContentType ContentTypeHeader
ExtraHeaders map[string][]string
ContentType ContentTypeHeader
ContentDisposition ContentDispositionHeader
ExtraHeaders map[string][]string
}

type emailBodies struct {
Expand Down
20 changes: 20 additions & 0 deletions tests/test_english_no_text_content.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Date: Mon, 01 Apr 2019 07:55:00 +0100 (BST)
From: =?uTf-8?b?QWxpY2UgU2VuZGVy?= <alice.sender@example.com>,
=?uTf-8?b?QWxpY2UgU2VuZGVy?= <alice.sender@example.net>
Sender: =?uTf-8?b?QWxpY2UgU2VuZGVy?= <alice.sender@example.com>
Reply-To: =?uTf-8?b?QWxpY2UgU2VuZGVy?= <alice.sender@example.net>
To: =?utf-8?b?Qm9iIFJlY2lwaWVudA==?= <bob.recipient@example.com>,
=?UTF-8?B?Q2Fyb2wgUmVjaXBpZW50?= <carol.recipient@example.com>
Cc: =?Utf-8?B?RGFuIFJlY2lwaWVudA==?= <dan.recipient@example.com>,
=?uTF-8?b?RXZlIFJlY2lwaWVudA==?= <eve.recipient@example.com>
Bcc: =?utf-8?b?RnJhbmsgUmVjaXBpZW50?= <frank.recipient@example.com>,
=?Utf-8?B?R3JhY2UgUmVjaXBpZW50?= <grace.recipient@example.com>
Message-ID: <Message-Id-1@example.com>
Subject: Test No Text Content, Attachment Only
Content-Type: applicaTION/PDF; NAME="attached-pdf-name.pdf"
Content-Disposition: AttachmenT; FILENAMe="attached-pdf-filename.pdf"
Content-Transfer-Encoding: BASE64
X-Clacks-Overhead: GNU Terry Pratchett

JVBERi0xLg10cmFpbGVyPDwvUm9vdDw8L1BhZ2VzPDwvS2lkc1s8PC9NZWRpYUJveFswIDAgMyAz
XT4+XT4+Pj4+Pg==