-
Notifications
You must be signed in to change notification settings - Fork 727
/
XmlExtensions.cs
144 lines (128 loc) · 5 KB
/
XmlExtensions.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
// Copyright (c) Charlie Poole, Rob Prouse and Contributors. MIT License - see LICENSE.txt
using System;
using System.Diagnostics.CodeAnalysis;
using System.Text;
using System.Xml;
namespace NUnit.Framework
{
/// <summary>
/// Contains extension methods that do not require a special <c>using</c> directive.
/// </summary>
internal static class XmlExtensions
{
// we want to write just the main element without XML declarations
internal static readonly XmlWriterSettings FragmentWriterSettings = new()
{
ConformanceLevel = ConformanceLevel.Fragment
};
/// <summary>
/// Checks that attribute value contains safe content and if not, escapes it.
/// </summary>
internal static void WriteAttributeStringSafe(this XmlWriter writer, string name, string value)
{
writer.WriteAttributeString(name, EscapeInvalidXmlCharacters(value));
}
/// <summary>
/// Checks that CDATA section contains safe content and if not, escapes it.
/// </summary>
internal static void WriteCDataSafe(this XmlWriter writer, string text)
{
if (text is null)
throw new ArgumentNullException(nameof(text));
text = EscapeInvalidXmlCharacters(text);
int start = 0;
while (true)
{
int illegal = text.IndexOf("]]>", start, StringComparison.Ordinal);
if (illegal < 0)
break;
writer.WriteCData(text.Substring(start, illegal - start + 2));
start = illegal + 2;
if (start >= text.Length)
return;
}
if (start > 0)
writer.WriteCData(text.Substring(start));
else
writer.WriteCData(text);
}
[return: NotNullIfNotNull("str")]
internal static string? EscapeInvalidXmlCharacters(string? str)
{
if (str is null)
return null;
// quick check when we expect valid input
foreach (var c in str)
{
if (c < 0x20 || c > 0x7F)
{
return EscapeInvalidXmlCharactersUnlikely(str);
}
}
return str;
}
private static string EscapeInvalidXmlCharactersUnlikely(string str)
{
StringBuilder? builder = null;
for (int i = 0; i < str.Length; i++)
{
char c = str[i];
if (c > 0x20 && c < 0x7F)
{
// ASCII characters - break quickly for these
builder?.Append(c);
}
// From the XML specification: https://www.w3.org/TR/xml/#charsets
// Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]
// Any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
else if (!(0x0 <= c && c <= 0x8) &&
c != 0xB &&
c != 0xC &&
!(0xE <= c && c <= 0x1F) &&
!(0x7F <= c && c <= 0x84) &&
!(0x86 <= c && c <= 0x9F) &&
!(0xD800 <= c && c <= 0xDFFF) &&
c != 0xFFFE &&
c != 0xFFFF)
{
builder?.Append(c);
}
// Also check if the char is actually a high/low surrogate pair of two characters.
// If it is, then it is a valid XML character (from above based on the surrogate blocks).
else if (char.IsHighSurrogate(c) &&
i + 1 != str.Length &&
char.IsLowSurrogate(str[i + 1]))
{
if (builder is not null)
{
builder.Append(c);
builder.Append(str[i + 1]);
}
i++;
}
else
{
// We keep the builder null so that we don't allocate a string
// when doing this conversion until we encounter a unicode character.
// Then, we allocate the rest of the string and escape the invalid
// character.
if (builder is null)
{
builder = new StringBuilder();
for (int index = 0; index < i; index++)
builder.Append(str[index]);
}
builder.Append(CharToUnicodeSequence(c));
}
}
if (builder is not null)
return builder.ToString();
else
return str;
}
private static string CharToUnicodeSequence(char symbol)
{
return $"\\u{(int)symbol:x4}";
}
}
}