Skip to content

Commit

Permalink
Update MailParsing Logic (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
rosehgal committed Jun 5, 2020
1 parent b67de8b commit 50c6558
Showing 1 changed file with 63 additions and 14 deletions.
77 changes: 63 additions & 14 deletions src/main/java/io/github/trashemail/utils/MailParser.java
Original file line number Diff line number Diff line change
@@ -1,46 +1,95 @@
package io.github.trashemail.utils;

import io.github.trashemail.Telegram.ForwardMailsToTelegram;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;

import org.apache.commons.mail.util.MimeMessageParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.mail.Address;
import javax.mail.Message;
import javax.mail.internet.MimeMessage;
import java.util.Date;

@Getter
@Setter
@NoArgsConstructor
public class MailParser {
private String to;
private String from;
private String subject;
private String content;
private Date date;
private String htmlContent;
private Boolean htmlContentSet;

private static final Logger log = LoggerFactory.getLogger(
MailParser.class);

public MailParser(Message message) throws Exception {
this.content="";
this.from="";
this.to="";
this.subject="";

this.htmlContentSet = false;
this.htmlContent = null;

for(Address a : message.getFrom())
this.from += a.toString() + ", ";
for(Address a : message.getAllRecipients())
this.to += a.toString() + ", ";

this.subject = message.getSubject();
MimeMessageParser messageParser = new MimeMessageParser((MimeMessage) message);
MimeMessageParser messageParser = new MimeMessageParser(
(MimeMessage) message
);
messageParser.parse();

// This block is to check whether mail contains plain text or html entities
// In case of html, with hasHtmlContent(), hasPlainContent() is also giving true, hence two conditions
// Also, getPlainContent().isEmpty() is to confirm that mail has html entities,
if (messageParser.hasPlainContent() && !messageParser.hasHtmlContent()){
this.content = messageParser.getPlainContent();
} else {
if (messageParser.getPlainContent().isEmpty()) {
this.content = org.jsoup.Jsoup.parse(messageParser.getHtmlContent()).text();
} else{
this.content = messageParser.getPlainContent();
/*
Multi-part mime refers to sending both an HTML and TEXT part of
an email message in a single email. When a subscriber's email client
receives a multipart message, it accepts the HTML version if it can
render HTML, otherwise it presents the plain text version.
So in multipart mails, the content is present in both HTML and plain
text and its completely on the client capabilities to process which one.
*/

if(messageParser.isMultipart()){
/*
This is the best bet.
We have both plain content and html content.
*/
this.htmlContent = messageParser.getHtmlContent();
this.htmlContentSet = true;
if(messageParser.getPlainContent().isEmpty()){
/*
Plain text segment is present but content is blank.
*/
this.content = org.jsoup.Jsoup.parse(
messageParser.getHtmlContent()).text();
}
else
this.content = messageParser.getPlainContent();
}
else if (messageParser.hasHtmlContent()) {
this.content = org.jsoup.Jsoup.parse(
messageParser.getHtmlContent()).text();
this.htmlContentSet = true;
this.htmlContent = messageParser.getHtmlContent();
}
else{
/*
Incoming mail is just plain text content.
*/
this.htmlContentSet = false;
this.content = messageParser.getPlainContent();
}

this.date = message.getSentDate();

}
Expand All @@ -51,9 +100,9 @@ public String toString() {
"To : %s\n" +
"From : %s\n" +
"Date : %s\n" +
"=========================================\n" +
"===========================\n" +
"Subject : %s\n" +
"=========================================\n\n" +
"===========================\n\n" +
"%s",
this.to,
this.from,
Expand Down

0 comments on commit 50c6558

Please sign in to comment.