Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix email separators from mobile and web clients #109

Merged
merged 2 commits into from Mar 20, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
29 changes: 19 additions & 10 deletions Mail2Bug/Email/EmailBodyProcessingUtils.cs
@@ -1,4 +1,4 @@
using System;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
Expand Down Expand Up @@ -33,21 +33,30 @@ public static string GetLastMessageText_Html(string rawBody)
{
CQ dom = rawBody;

const string messageSeparatorStyle = "border:none;border-top:solid #E1E1E1 1.0pt;padding:3.0pt 0in 0in 0in";
const string outlookDesktopSeparatorStyle = "border:none;border-top:solid #E1E1E1 1.0pt;padding:3.0pt 0in 0in 0in";
const string outlookMobileSeparatorStyle = "display:inline-block;width:98%";

// There's no well-defined way to parse the latest email from a thread
// We have to use heuristics to cover different email clients
foreach (IDomObject element in dom["*"])
{
// Lots of email clients insert html elements as message delimiters which have styling but no inner text
// This block checks for some of these patterns
if (element.NodeName == "DIV")
if (string.Equals(element.NodeName, "div", StringComparison.OrdinalIgnoreCase) &&
outlookDesktopSeparatorStyle.Equals(element.GetAttribute("style")))
{
if (element.Id == "divRplyFwdMsg" || element.Id == "x_divRplyFwdMsg" || messageSeparatorStyle.Equals(element.GetAttribute("style")))
EduardoRomo marked this conversation as resolved.
Show resolved Hide resolved
{
IDomContainer parent = element.ParentNode;
element.Remove();
RemoveSubsequent(parent);
break;
}
IDomContainer parent = element.ParentNode;
RemoveSubsequent(parent);
parent.Remove();
break;
}

if (string.Equals(element.NodeName, "hr", StringComparison.OrdinalIgnoreCase) &&
outlookMobileSeparatorStyle.Equals(element.GetAttribute("style")))
EduardoRomo marked this conversation as resolved.
Show resolved Hide resolved
{
RemoveSubsequent(element);
element.Remove();
break;
}

if (!element.ChildElements.Any() && !string.IsNullOrWhiteSpace(element.InnerText))
Expand Down
57 changes: 21 additions & 36 deletions Mail2BugUnitTests/EmailBodyProcessingUtilsUnitTest.cs
@@ -1,4 +1,4 @@
using System;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
Expand Down Expand Up @@ -45,7 +45,7 @@ public void TestConvertHtmlMessageToPlainTextBasic()

// Can't have '<' or '>' chars in the content, since it breaks the HTML processing. This is OK, since real HTML should never have these
// chracters either (they will be escaped as &lt; and &gt;)
var expectedText =
var expectedText =
StringFactory.GenerateRandomString(properties, _rand.Next()).Trim().Replace("<", "").Replace(">", "");
var htmlText = string.Format("<html><head></head><body><p>{0}</p></body></html>", expectedText);
var plainText = EmailBodyProcessingUtils.ConvertHtmlMessageToPlainText(htmlText);
Expand Down Expand Up @@ -118,42 +118,27 @@ public void TestGetLastMessageText_NoPrevious()
}

[TestMethod]
public void TestGetLastMessageText_Previous_FromColon()
public void TestGetLastMessageText_EmailClientsSchemas()
{
string original = @"<html>
<body>
<div class=""wrapppingBothMessageAndReply"">
<p class=""customStyling"">This is random text with some custom styling and outlook-generated elements.<o:p></o:p></p>
<div>
<div style=""border:none;border-top:solid"">
<p class=""customStyling""><b>From:</b> someAddress;<br><b>Subject:</b> RE: Build error<o:p></o:p>
</p>
</div>
</div>
<p class=""customStyling"">
<o:p>&nbsp;</o:p>
</p>
<p class=""customStyling"">text of the reply
</p>
</div>
<div>Something after the containing div</div>
</body>
</html>";

// Note: it's acceptable to not preserve whitespace because it's
// manipulating HTML, not plain text. As long as the rendered page isn't impacted, all is well
// Note that we expect that both
// 1. Elements following the latest message are removed
// 2. Anything in the same element as the latest message but after the start of the previous should be cleared out
string expected = @"<html><head></head><body>
<div class=""wrapppingBothMessageAndReply"">
<p class=""customStyling"">This is random text with some custom styling and outlook-generated elements.<o:p></o:p></p>
<div>
<div style=""border:none;border-top:solid"">
<p class=""customStyling""><b></b></p></div></div></div></body></html>";
const string schemasFolder = "LastMessageSchemas";
foreach (var originalFilename in Directory.GetFiles(schemasFolder, "*.orig"))
{
Trace.WriteLine(string.Format("Processing email schema file {0}", originalFilename));

string actual = EmailBodyProcessingUtils.GetLastMessageText_Html(original);
Assert.AreEqual(Normalize(expected), Normalize(actual));
var baseFilename = Path.GetFileNameWithoutExtension(originalFilename);
var expectedFilename = Path.Combine(schemasFolder, baseFilename + ".expected");

var original = File.ReadAllText(originalFilename);
var expected = Normalize(File.ReadAllText(expectedFilename));
var actual = Normalize(EmailBodyProcessingUtils.GetLastMessageText_Html(original));

// Note: it's acceptable to not preserve whitespace because it's
// manipulating HTML, not plain text. As long as the rendered page isn't impacted, all is well
// Note that we expect that both
// 1. Elements following the latest message are removed
// 2. Anything in the same element as the latest message but after the start of the previous should be cleared out
Assert.AreEqual(expected, actual);
}
}

private static string Normalize(string text)
Expand Down
6 changes: 6 additions & 0 deletions Mail2BugUnitTests/LastMessageSchemas/FromColon.expected
@@ -0,0 +1,6 @@
<html><head></head><body>
<div class="wrapppingBothMessageAndReply">
<p class="customStyling">This is random text with some custom styling and outlook-generated elements.<o:p></o:p></p>
<div>
<div style="border:none;border-top:solid">
<p class="customStyling"><b></b></p></div></div></div></body></html>
19 changes: 19 additions & 0 deletions Mail2BugUnitTests/LastMessageSchemas/FromColon.orig
@@ -0,0 +1,19 @@
<html>
<body>
<div class="wrapppingBothMessageAndReply">
<p class="customStyling">This is random text with some custom styling and outlook-generated elements.<o:p></o:p></p>
<div>
<div style="border:none;border-top:solid">
<p class="customStyling"><b>From:</b> someAddress;<br><b>Subject:</b> RE: Build error<o:p></o:p>
</p>
</div>
</div>
<p class="customStyling">
<o:p>&nbsp;</o:p>
</p>
<p class="customStyling">text of the reply
</p>
</div>
<div>Something after the containing div</div>
</body>
</html>
@@ -0,0 +1,4 @@
<html><head></head><body>
<div class="wrappingBothMessageAndReply">
<b><p class="customStyling">This is random text with some custom styling and outlook-generated elements.<o:p></o:p></p></b>
</div></body></html>
18 changes: 18 additions & 0 deletions Mail2BugUnitTests/LastMessageSchemas/OutlookDesktopSeparator.orig
@@ -0,0 +1,18 @@
<html>
<body>
<div class="wrappingBothMessageAndReply">
<b><p class="customStyling">This is random text with some custom styling and outlook-generated elements.<o:p></o:p></p></b>
<div>
<div style="border:none;border-top:solid #E1E1E1 1.0pt;padding:3.0pt 0in 0in 0in">
<p class="customStyling">
<b>Old emails from: field and subject</b>
</p>
</div>
</div>
<p class="customStyling"><o:p>&nbsp;</o:p></p>
<div>
Something after the containing div
</div>
</div>
</body>
</html>
@@ -0,0 +1,10 @@
<html><head></head>
<body>
<div>
<div>
<div style="customStyle">Message content from iPhone</div>
</div>
<div><br></div>
<div class="ms-outlook-ios-signature"></div>
</div>
</body></html>
15 changes: 15 additions & 0 deletions Mail2BugUnitTests/LastMessageSchemas/OutlookMobileSeparator.orig
@@ -0,0 +1,15 @@
<html><head></head>
<body>
<div>
<div>
<div style="customStyle">Message content from iPhone</div>
</div>
<div><br /></div>
<div class="ms-outlook-ios-signature"></div>
</div>
<hr style="display:inline-block;width:98%" tabindex="-1" />
<div>
Something after the containing div
</div>
</body>
</html>
18 changes: 18 additions & 0 deletions Mail2BugUnitTests/Mail2BugUnitTests.csproj
Expand Up @@ -82,6 +82,24 @@
</ItemGroup>
<ItemGroup>
<None Include="app.config" />
<None Include="LastMessageSchemas\FromColon.expected">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Include="LastMessageSchemas\FromColon.orig">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Include="LastMessageSchemas\OutlookDesktopSeparator.orig">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Include="LastMessageSchemas\OutlookMobileSeparator.expected">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Include="LastMessageSchemas\OutlookMobileSeparator.orig">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Include="LastMessageSchemas\OutlookDesktopSeparator.expected">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Include="packages.config" />
<None Include="RegressionMessages\Basic.expected">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
Expand Down