From 8872e22c3aff7da7765c83ae3e4b252eb51e532e Mon Sep 17 00:00:00 2001 From: Immortal Date: Wed, 29 May 2013 13:34:18 +0200 Subject: [PATCH] Updated SgmlDomBuilder so that it respects significant whitespaces. --- .gitignore | 1 + .../NReadability.Tests/SgmlDomBuilderTests.cs | 14 +++++++++++++- Src/NReadability/NReadability/SgmlDomBuilder.cs | 2 +- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index a4ce609..4b25b62 100644 --- a/.gitignore +++ b/.gitignore @@ -36,3 +36,4 @@ /Src/NReadability/NReadability.6.1.ReSharper.user /Src/NReadability/NReadability.sln.DotSettings.user *.suo +Src/NReadability/NReadability.sln.DotSettings diff --git a/Src/NReadability/NReadability.Tests/SgmlDomBuilderTests.cs b/Src/NReadability/NReadability.Tests/SgmlDomBuilderTests.cs index d78e5fa..0543f0f 100644 --- a/Src/NReadability/NReadability.Tests/SgmlDomBuilderTests.cs +++ b/Src/NReadability/NReadability.Tests/SgmlDomBuilderTests.cs @@ -1,4 +1,5 @@ -using System.Xml.Linq; +using System; +using System.Xml.Linq; using NUnit.Framework; namespace NReadability.Tests @@ -54,6 +55,17 @@ public void Builder_handles_invalid_entity_references() Assert.DoesNotThrow(() => _sgmlDomSerializer.SerializeDocument(document)); } + [Test] + public void Builder_respects_significant_whitespaces() + { + XDocument document = + _sgmlDomBuilder.BuildDocument("Link1 Link2"); + + string html = _sgmlDomSerializer.SerializeDocument(document); + + Assert.IsTrue(html.Contains("Link1 Link2")); + } + #endregion } } diff --git a/Src/NReadability/NReadability/SgmlDomBuilder.cs b/Src/NReadability/NReadability/SgmlDomBuilder.cs index 44d918d..68cecc4 100644 --- a/Src/NReadability/NReadability/SgmlDomBuilder.cs +++ b/Src/NReadability/NReadability/SgmlDomBuilder.cs @@ -96,7 +96,7 @@ private static XDocument LoadDocument(string htmlContent) { sgmlReader.CaseFolding = CaseFolding.ToLower; sgmlReader.DocType = "HTML"; - sgmlReader.WhitespaceHandling = WhitespaceHandling.None; + sgmlReader.WhitespaceHandling = WhitespaceHandling.Significant; using (var sr = new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(htmlContent)))) {