From 50c6558cd8f7bebca65afd39cece4ad0ce47bbcc Mon Sep 17 00:00:00 2001 From: Rohit Sehgal Date: Fri, 5 Jun 2020 16:22:04 +0530 Subject: [PATCH] Update MailParsing Logic (#26) --- .../github/trashemail/utils/MailParser.java | 77 +++++++++++++++---- 1 file changed, 63 insertions(+), 14 deletions(-) diff --git a/src/main/java/io/github/trashemail/utils/MailParser.java b/src/main/java/io/github/trashemail/utils/MailParser.java index 84ab865..9118c05 100644 --- a/src/main/java/io/github/trashemail/utils/MailParser.java +++ b/src/main/java/io/github/trashemail/utils/MailParser.java @@ -1,18 +1,32 @@ package io.github.trashemail.utils; -import io.github.trashemail.Telegram.ForwardMailsToTelegram; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; + import org.apache.commons.mail.util.MimeMessageParser; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import javax.mail.Address; import javax.mail.Message; import javax.mail.internet.MimeMessage; import java.util.Date; +@Getter +@Setter +@NoArgsConstructor public class MailParser { private String to; private String from; private String subject; private String content; private Date date; + private String htmlContent; + private Boolean htmlContentSet; + + private static final Logger log = LoggerFactory.getLogger( + MailParser.class); public MailParser(Message message) throws Exception { this.content=""; @@ -20,27 +34,62 @@ public MailParser(Message message) throws Exception { this.to=""; this.subject=""; + this.htmlContentSet = false; + this.htmlContent = null; + for(Address a : message.getFrom()) this.from += a.toString() + ", "; for(Address a : message.getAllRecipients()) this.to += a.toString() + ", "; this.subject = message.getSubject(); - MimeMessageParser messageParser = new MimeMessageParser((MimeMessage) message); + MimeMessageParser messageParser = new MimeMessageParser( + (MimeMessage) message + ); messageParser.parse(); - // This block is to check whether mail contains plain text or html entities - // In case of html, with hasHtmlContent(), hasPlainContent() is also giving true, hence two conditions - // Also, getPlainContent().isEmpty() is to confirm that mail has html entities, - if (messageParser.hasPlainContent() && !messageParser.hasHtmlContent()){ - this.content = messageParser.getPlainContent(); - } else { - if (messageParser.getPlainContent().isEmpty()) { - this.content = org.jsoup.Jsoup.parse(messageParser.getHtmlContent()).text(); - } else{ - this.content = messageParser.getPlainContent(); + /* + Multi-part mime refers to sending both an HTML and TEXT part of + an email message in a single email. When a subscriber's email client + receives a multipart message, it accepts the HTML version if it can + render HTML, otherwise it presents the plain text version. + + So in multipart mails, the content is present in both HTML and plain + text and its completely on the client capabilities to process which one. + + */ + + if(messageParser.isMultipart()){ + /* + This is the best bet. + We have both plain content and html content. + */ + this.htmlContent = messageParser.getHtmlContent(); + this.htmlContentSet = true; + if(messageParser.getPlainContent().isEmpty()){ + /* + Plain text segment is present but content is blank. + */ + this.content = org.jsoup.Jsoup.parse( + messageParser.getHtmlContent()).text(); } + else + this.content = messageParser.getPlainContent(); + } + else if (messageParser.hasHtmlContent()) { + this.content = org.jsoup.Jsoup.parse( + messageParser.getHtmlContent()).text(); + this.htmlContentSet = true; + this.htmlContent = messageParser.getHtmlContent(); } + else{ + /* + Incoming mail is just plain text content. + */ + this.htmlContentSet = false; + this.content = messageParser.getPlainContent(); + } + this.date = message.getSentDate(); } @@ -51,9 +100,9 @@ public String toString() { "To : %s\n" + "From : %s\n" + "Date : %s\n" + - "=========================================\n" + + "===========================\n" + "Subject : %s\n" + - "=========================================\n\n" + + "===========================\n\n" + "%s", this.to, this.from,