Skip to content

Commit

Permalink
Fix bug #687444, null-character handling is now closer and as illogic…
Browse files Browse the repository at this point in the history
…al as .NET.
  • Loading branch information
Atsushi Eno committed Apr 14, 2011
1 parent 0b9c06b commit 8277f4a
Show file tree
Hide file tree
Showing 5 changed files with 67 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ public static bool IsIgnorable (int cp)
public static bool IsIgnorable (int cp, byte flag)
{
if (cp == 0)
return false;
return true;
if ((flag & 1) != 0) {
UnicodeCategory uc = Char.GetUnicodeCategory ((char) cp);
// This check eliminates some extraneous code areas
Expand Down
16 changes: 10 additions & 6 deletions mcs/class/corlib/Mono.Globalization.Unicode/SimpleCollator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1431,15 +1431,17 @@ int QuickIndexOf (string s, string target, int start, int length, out bool testW
bool no = false;
for (int j = 0; j < target.Length; j++) {
if (testedTargetPos < j) {
if (target [j] >= 0x80) {
char c = target [j];
if (c == 0 || c >= 0x80) {
testWasUnable = true;
return -1;
}
else
testedTargetPos = j;
}
if (testedSourcePos < i + j) {
if (s [i + j] >= 0x80) {
char c = s [i + j];
if (c == 0 || c >= 0x80) {
testWasUnable = true;
return -1;
}
Expand Down Expand Up @@ -1631,7 +1633,8 @@ unsafe int IndexOf (string s, string target, int start, int length, byte* target
if (!IsIgnorable (target [tidx], opt))
break;
if (tidx == target.Length)
return start;
// FIXME: this is likely a hack. A string that is consists of \0 differs from those of other ignorable characters.
return IndexOfOrdinal (target, '\0', 0, target.Length) >= 0 ? IndexOfOrdinal (s, target, start, length) : start;
Contraction ct = GetContraction (target, tidx, target.Length - tidx);
string replace = ct != null ? ct.Replacement : null;
byte* sk = replace == null ? targetSortKey : null;
Expand Down Expand Up @@ -1723,7 +1726,7 @@ public unsafe int LastIndexOf (string s, string target, int start, int length, C
int LastIndexOfOrdinal (string s, string target, int start, int length)
{
if (target.Length == 0)
return 0;
return start;
if (s.Length < target.Length || target.Length > length)
return -1;
int end = start - length + target.Length -1;
Expand Down Expand Up @@ -1751,7 +1754,7 @@ int LastIndexOfOrdinal (string s, string target, int start, int length)
int LastIndexOfOrdinalIgnoreCase (string s, string target, int start, int length)
{
if (target.Length == 0)
return 0;
return start;
if (s.Length < length || target.Length > length)
return -1;
int end = start - length + target.Length - 1;
Expand Down Expand Up @@ -1879,7 +1882,8 @@ unsafe int LastIndexOf (string s, string target, int start, int length, byte* ta
if (!IsIgnorable (target [tidx], opt))
break;
if (tidx == target.Length)
return start;
// FIXME: this is likely a hack. A string that is consists of \0 differs from those of other ignorable characters.
return IndexOfOrdinal (target, '\0', 0, target.Length) >= 0 ? LastIndexOfOrdinal (s, target, start, length) : start;
Contraction ct = GetContraction (target, tidx, target.Length - tidx);
string replace = ct != null ? ct.Replacement : null;
byte* sk = replace == null ? targetSortKey : null;
Expand Down
3 changes: 3 additions & 0 deletions mcs/class/corlib/Mono.Globalization.Unicode/SortKeyBuffer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,9 @@ private int GetOptimizedLength (byte [] data, int len, byte defaultValue)

public SortKey GetResult ()
{
if (source.Length == 0)
return new SortKey (lcid, source, new byte [0], options, 0, 0, 0, 0, 0, 0, 0, 0);

if (frenchSort && !frenchSorted && l2b != null) {
int i = 0;
for (; i < l2b.Length; i++)
Expand Down
2 changes: 1 addition & 1 deletion mcs/class/corlib/System.Globalization/CompareInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -824,7 +824,7 @@ public virtual int LastIndexOf(string source, string value,

int valuelen=value.Length;
if(valuelen==0) {
return(0);
return(startIndex);
}

return(internal_index_switch (source, startIndex, count,
Expand Down
75 changes: 52 additions & 23 deletions mcs/class/corlib/Test/System.Globalization/CompareInfoTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ public void Compare()
Assert.AreEqual (0, CultureInfo.InvariantCulture.CompareInfo.Compare (null, null), "Compare two null references");
Assert.AreEqual (1, CultureInfo.InvariantCulture.CompareInfo.Compare ("", null), "Compare a string to a null reference");
Assert.AreEqual (-1, CultureInfo.InvariantCulture.CompareInfo.Compare (null, ""), "Compare a null reference to a string");

}

// Culture-sensitive collation tests
Expand Down Expand Up @@ -977,9 +976,6 @@ public void IsSuffixMSBug ()
}

[Test]
#if NET_2_0
[Category ("NotDotNet")]
#endif
public void IndexOfString ()
{
if (!doTest)
Expand All @@ -996,26 +992,39 @@ public void IndexOfString ()
AssertIndexOf ("#8", 0, "-ABC", "-", CompareOptions.None);
AssertIndexOf ("#9", 0, "--ABC", "--", CompareOptions.None);
AssertIndexOf ("#10", -1, "--ABC", "--", 1, 2, CompareOptions.None, invariant);
// BUG in .NET 2.0 : see GetSortKey() test (mentioned above).
AssertIndexOf ("#11", 0, "AE", "\u00C6", CompareOptions.None);

// U+3007 is completely ignored character.
AssertIndexOf ("#12", 0, "\uff21\uff21", "\uff21", CompareOptions.None);
// BUG in .NET 2.0 : see \u3007 issue (mentioned above).
AssertIndexOf ("#13", 0, "\uff21\uff21", "\u3007\uff21", CompareOptions.None);

AssertIndexOf ("#14", 0, "\uff21\uff21", "\uff21\u3007", CompareOptions.None);
AssertIndexOf ("#15", 0, "\uff21\uff21", "\u3007", CompareOptions.None);
AssertIndexOf ("#15-2", 1, "\u3007\uff21", "\uff21", CompareOptions.None);
// target is "empty" (in culture-sensitive context).
AssertIndexOf ("#16", -1, String.Empty, "\u3007");
// BUG in .NET 2.0 : see \u3007 issue (mentioned above).
AssertIndexOf ("#17", 0, "A", "\u3007");

AssertIndexOf ("#18", 0, "ABC", "\u3007");

AssertIndexOf ("#19", 0, "\\b\\a a", "\\b\\a a");
Assert.AreEqual (0, new CultureInfo ("en").CompareInfo.IndexOf ("\\b\\a a", "\\b\\a a"), "#19en");
Assert.AreEqual (0, new CultureInfo ("ja").CompareInfo.IndexOf ("\\b\\a a", "\\b\\a a"), "#19ja");
}

[Test]
#if NET_2_0
[Category ("NotDotNet")]
#endif
public void IndexOfStringWeird ()
{
// BUG in .NET 2.0 : see GetSortKey() test (mentioned above).
AssertIndexOf ("#11", 0, "AE", "\u00C6", CompareOptions.None);

// BUG in .NET 2.0 : see \u3007 issue (mentioned above).
AssertIndexOf ("#13", 0, "\uff21\uff21", "\u3007\uff21", CompareOptions.None);

// BUG in .NET 2.0 : see \u3007 issue (mentioned above).
AssertIndexOf ("#17", 0, "A", "\u3007");
}

[Test]
public void IndexOfSpecialWeight ()
{
Expand All @@ -1042,9 +1051,6 @@ public void IndexOfSpecialWeight ()
}

[Test]
#if NET_2_0
[Category ("NotDotNet")]
#endif
public void LastIndexOfString ()
{
if (!doTest)
Expand All @@ -1057,18 +1063,15 @@ public void LastIndexOfString ()
AssertLastIndexOf ("#5", 4, "ABCABC", "BC", CompareOptions.IgnoreCase);
AssertLastIndexOf ("#6", 4, "BBCBBC", "BC", CompareOptions.IgnoreCase);
AssertLastIndexOf ("#7", 1, "original", "rig", CompareOptions.None);
// BUG in .NET 2.0 : see GetSortKey() test (mentioned above).
AssertLastIndexOf ("#8", 0, "\u00E6", "ae", CompareOptions.None);

AssertLastIndexOf ("#9", 0, "-ABC", "-", CompareOptions.None);
AssertLastIndexOf ("#10", 0, "--ABC", "--", CompareOptions.None);
AssertLastIndexOf ("#11", -1, "--ABC", "--", 2, 2, CompareOptions.None, invariant);
AssertLastIndexOf ("#12", -1, "--ABC", "--", 4, 2, CompareOptions.None, invariant);
// BUG in .NET 2.0 : see GetSortKey() test (mentioned above).
AssertLastIndexOf ("#13", 0, "AE", "\u00C6", CompareOptions.None);

// U+3007 is completely ignored character.
AssertLastIndexOf ("#14", 1, "\uff21\uff21", "\uff21", CompareOptions.None);
// BUG in .NET 2.0 : see \u3007 issue (mentioned above).
AssertLastIndexOf ("#15", 1, "\uff21\uff21", "\u3007\uff21", CompareOptions.None);

AssertLastIndexOf ("#16", 1, "\uff21\uff21", "\uff21\u3007", CompareOptions.None);
AssertLastIndexOf ("#17", 1, "\uff21\uff21", "\u3007", CompareOptions.None);
AssertLastIndexOf ("#18", 1, "\u3007\uff21", "\uff21", CompareOptions.None);
Expand All @@ -1078,6 +1081,25 @@ public void LastIndexOfString ()
// bug #80612
AssertLastIndexOf ("#20", 8, "/system/web", "w");
Assert.AreEqual (8, new CultureInfo ("sv").CompareInfo.LastIndexOf ("/system/web", "w"), "#20sv");

AssertLastIndexOf ("#21", 2, "foo", String.Empty);
}

[Test]
[Category ("NotDotNet")]
public void LastIndexOfStringDotnetWeird ()
{
if (!doTest)
return;

// BUG in .NET 2.0 : see GetSortKey() test (mentioned above).
AssertLastIndexOf ("#8", 0, "\u00E6", "ae", CompareOptions.None);

// BUG in .NET 2.0 : see GetSortKey() test (mentioned above).
AssertLastIndexOf ("#13", 0, "AE", "\u00C6", CompareOptions.None);

// BUG in .NET 2.0 : see \u3007 issue (mentioned above).
AssertLastIndexOf ("#15", 1, "\uff21\uff21", "\u3007\uff21", CompareOptions.None);
}

[Test]
Expand Down Expand Up @@ -1125,20 +1147,27 @@ public void LastIndexOfOrdinalString ()
}

[Test]
// for bug #76702
public void NullCharacter ()
{
// for bug #76702
Assert.AreEqual (-1, "MONO".IndexOf ("\0\0\0"), "#1");
Assert.AreEqual (-1, "MONO".LastIndexOf ("\0\0\0"), "#2");
Assert.AreEqual (1, "MONO".CompareTo ("\0\0\0"), "#3");

// I don't really understand why they are so...
AssertIndexOf ("#4", 0, "\0\0", "\0");
AssertIndexOf ("#5", -1, "\0", "\0\0");
AssertIndexOf ("#6", -1, "foo", "\0");
AssertLastIndexOf ("#7", 1, "\0\0", "\0");
AssertLastIndexOf ("#8", -1, "\0", "\0\0");
AssertLastIndexOf ("#9", -1, "foo", "\0");
}

[Test]
[Category ("NotDotNet")]
// MS.NET treats it as equivalent, while in IndexOf() it does not match.
// LAMESPEC: MS.NET treats it as equivalent, while in IndexOf() it does not match.
public void NullCharacterWeird ()
{
Assert.AreEqual (-1, "MONO".CompareTo ("MONO\0\0\0"), "#4");
Assert.AreEqual (0, "MONO".CompareTo ("MONO\0\0\0"), "#4");
}

#if NET_2_0
Expand Down

0 comments on commit 8277f4a

Please sign in to comment.