Skip to content

Commit

Permalink
Ordinal Ignore Case Optimization (dotnet#40910)
Browse files Browse the repository at this point in the history
  • Loading branch information
tarekgh committed Aug 18, 2020
1 parent 600eaa2 commit 1c1dc8b
Show file tree
Hide file tree
Showing 21 changed files with 1,113 additions and 752 deletions.
3 changes: 3 additions & 0 deletions src/libraries/Common/src/Interop/Interop.Casing.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,8 @@ internal static partial class Globalization

[DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_ChangeCaseTurkish")]
internal static extern unsafe void ChangeCaseTurkish(char* src, int srcLen, char* dstBuffer, int dstBufferCapacity, bool bToUpper);

[DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_InitOrdinalCasingPage")]
internal static extern unsafe void InitOrdinalCasingPage(int pageNumber, char* pTarget);
}
}
9 changes: 0 additions & 9 deletions src/libraries/Common/src/Interop/Interop.Collation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,6 @@ internal static partial class Globalization
[DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_LastIndexOf")]
internal static extern unsafe int LastIndexOf(IntPtr sortHandle, char* target, int cwTargetLength, char* pSource, int cwSourceLength, CompareOptions options, int* matchLengthPtr);

[DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_IndexOfOrdinalIgnoreCase")]
internal static extern unsafe int IndexOfOrdinalIgnoreCase(string target, int cwTargetLength, char* pSource, int cwSourceLength, bool findLast);

[DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_IndexOfOrdinalIgnoreCase")]
internal static extern unsafe int IndexOfOrdinalIgnoreCase(char* target, int cwTargetLength, char* pSource, int cwSourceLength, bool findLast);

[DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_StartsWith")]
[return: MarshalAs(UnmanagedType.Bool)]
internal static extern unsafe bool StartsWith(IntPtr sortHandle, char* target, int cwTargetLength, char* source, int cwSourceLength, CompareOptions options, int* matchedLength);
Expand All @@ -49,9 +43,6 @@ internal static partial class Globalization
[DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_GetSortKey")]
internal static extern unsafe int GetSortKey(IntPtr sortHandle, char* str, int strLength, byte* sortKey, int sortKeyLength, CompareOptions options);

[DllImport(Libraries.GlobalizationNative, CharSet = CharSet.Unicode, EntryPoint = "GlobalizationNative_CompareStringOrdinalIgnoreCase")]
internal static extern unsafe int CompareStringOrdinalIgnoreCase(char* lpStr1, int cwStr1Len, char* lpStr2, int cwStr2Len);

[DllImport(Libraries.GlobalizationNative, EntryPoint = "GlobalizationNative_GetSortVersion")]
internal static extern int GetSortVersion(IntPtr sortHandle);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ FCFuncStart(gPalGlobalizationNative)
QCFuncElement("ChangeCaseTurkish", GlobalizationNative_ChangeCaseTurkish)
QCFuncElement("CloseSortHandle", GlobalizationNative_CloseSortHandle)
QCFuncElement("CompareString", GlobalizationNative_CompareString)
QCFuncElement("CompareStringOrdinalIgnoreCase", GlobalizationNative_CompareStringOrdinalIgnoreCase)
QCFuncElement("EndsWith", GlobalizationNative_EndsWith)
QCFuncElement("EnumCalendarInfo", GlobalizationNative_EnumCalendarInfo)
QCFuncElement("GetCalendarInfo", GlobalizationNative_GetCalendarInfo)
Expand All @@ -49,8 +48,8 @@ FCFuncStart(gPalGlobalizationNative)
QCFuncElement("GetSortVersion", GlobalizationNative_GetSortVersion)
QCFuncElement("GetTimeZoneDisplayName", GlobalizationNative_GetTimeZoneDisplayName)
QCFuncElement("IndexOf", GlobalizationNative_IndexOf)
QCFuncElement("IndexOfOrdinalIgnoreCase", GlobalizationNative_IndexOfOrdinalIgnoreCase)
QCFuncElement("InitICUFunctions", GlobalizationNative_InitICUFunctions)
QCFuncElement("InitOrdinalCasingPage", GlobalizationNative_InitOrdinalCasingPage)
QCFuncElement("IsNormalized", GlobalizationNative_IsNormalized)
QCFuncElement("IsPredefinedLocale", GlobalizationNative_IsPredefinedLocale)
QCFuncElement("LastIndexOf", GlobalizationNative_LastIndexOf)
Expand Down
18 changes: 18 additions & 0 deletions src/libraries/Native/Unix/System.Globalization.Native/pal_casing.c
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,24 @@ void GlobalizationNative_ChangeCaseTurkish(
}
}

void GlobalizationNative_InitOrdinalCasingPage(int32_t pageNumber, UChar* pTarget)
{
pageNumber <<= 8;
for (int i = 0; i < 256; i++)
{
// Unfortunately, to ensure one-to-one simple mapping we have to call u_toupper on every character.
// Using string casing ICU APIs cannot give such results even when using NULL locale to force root behavior.
pTarget[i] = (UChar) u_toupper((UChar32)(pageNumber + i));
}

if (pageNumber == 0x0100)
{
// Disable Turkish I behavior on Ordinal operations
pTarget[0x31] = (UChar)0x0131; // Turkish lowercase i
pTarget[0x7F] = (UChar)0x017F; // // 017F;LATIN SMALL LETTER LONG S
}
}

#ifdef __clang__
#pragma clang diagnostic pop
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,5 @@ PALEXPORT void GlobalizationNative_ChangeCaseTurkish(const UChar* lpSrc,
UChar* lpDst,
int32_t cwDstLength,
int32_t bToUpper);

PALEXPORT void GlobalizationNative_InitOrdinalCasingPage(int32_t pageNumber, UChar* pTarget);
103 changes: 2 additions & 101 deletions src/libraries/Native/Unix/System.Globalization.Native/pal_collation.c
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ int32_t GlobalizationNative_CompareString(
}
if (lpStr2 == NULL)
{
lpStr2 = &dummyChar;
lpStr2 = &dummyChar;
}

result = ucol_strcoll(pColl, lpStr1, cwStr1Length, lpStr2, cwStr2Length);
Expand Down Expand Up @@ -497,7 +497,7 @@ int32_t GlobalizationNative_IndexOf(

return (result == UCOL_EQUAL) ? 0 : -1;
}

UErrorCode err = U_ZERO_ERROR;
const UCollator* pColl = GetCollatorFromSortHandle(pSortHandle, options, &err);

Expand Down Expand Up @@ -605,61 +605,6 @@ static int AreEqualOrdinalIgnoreCase(UChar32 one, UChar32 two)
return u_toupper(one) == u_toupper(two);
}

/*
Function:
IndexOfOrdinalIgnoreCase
*/
int32_t GlobalizationNative_IndexOfOrdinalIgnoreCase(
const UChar* lpTarget, int32_t cwTargetLength, const UChar* lpSource, int32_t cwSourceLength, int32_t findLast)
{
int32_t result = -1;

int32_t endIndex = cwSourceLength - cwTargetLength;
assert(endIndex >= 0);

int32_t i = 0;
while (i <= endIndex)
{
int32_t srcIdx = i, trgIdx = 0;
const UChar *src = lpSource, *trg = lpTarget;

int32_t match = TRUE;
while (trgIdx < cwTargetLength)
{
UChar32 srcCodepoint, trgCodepoint;

#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wsign-conversion"
#endif
U16_NEXT(src, srcIdx, cwSourceLength, srcCodepoint);
U16_NEXT(trg, trgIdx, cwTargetLength, trgCodepoint);
#ifdef __clang__
#pragma clang diagnostic pop
#endif

if (!AreEqualOrdinalIgnoreCase(srcCodepoint, trgCodepoint))
{
match = FALSE;
break;
}
}

if (match)
{
result = i;
if (!findLast)
{
break;
}
}

U16_FWD_1(lpSource, i, cwSourceLength);
}

return result;
}

/*
collation element is an int used for sorting. It consists of 3 components:
* primary - first 16 bits, representing the base letter
Expand Down Expand Up @@ -934,47 +879,3 @@ int32_t GlobalizationNative_GetSortKey(

return result;
}

int32_t GlobalizationNative_CompareStringOrdinalIgnoreCase(
const UChar* lpStr1, int32_t cwStr1Length, const UChar* lpStr2, int32_t cwStr2Length)
{
assert(lpStr1 != NULL);
assert(cwStr1Length >= 0);
assert(lpStr2 != NULL);
assert(cwStr2Length >= 0);

int32_t str1Idx = 0;
int32_t str2Idx = 0;

while (str1Idx < cwStr1Length && str2Idx < cwStr2Length)
{
UChar32 str1Codepoint, str2Codepoint;

#ifdef __clang__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wsign-conversion"
#endif
U16_NEXT(lpStr1, str1Idx, cwStr1Length, str1Codepoint);
U16_NEXT(lpStr2, str2Idx, cwStr2Length, str2Codepoint);
#ifdef __clang__
#pragma clang diagnostic pop
#endif

if (str1Codepoint != str2Codepoint && u_toupper(str1Codepoint) != u_toupper(str2Codepoint))
{
return str1Codepoint < str2Codepoint ? -1 : 1;
}
}

if (cwStr1Length < cwStr2Length)
{
return -1;
}

if (cwStr2Length < cwStr1Length)
{
return 1;
}

return 0;
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,6 @@ PALEXPORT int32_t GlobalizationNative_LastIndexOf(SortHandle* pSortHandle,
int32_t options,
int32_t* pMatchedLength);

PALEXPORT int32_t GlobalizationNative_IndexOfOrdinalIgnoreCase(const UChar* lpTarget,
int32_t cwTargetLength,
const UChar* lpSource,
int32_t cwSourceLength,
int32_t findLast);

PALEXPORT int32_t GlobalizationNative_StartsWith(SortHandle* pSortHandle,
const UChar* lpTarget,
int32_t cwTargetLength,
Expand All @@ -67,8 +61,3 @@ PALEXPORT int32_t GlobalizationNative_GetSortKey(SortHandle* pSortHandle,
uint8_t* sortKey,
int32_t cbSortKeyLength,
int32_t options);

PALEXPORT int32_t GlobalizationNative_CompareStringOrdinalIgnoreCase(const UChar* lpStr1,
int32_t cwStr1Length,
const UChar* lpStr2,
int32_t cwStr2Length);
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,8 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\Normalization.Nls.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\NumberFormatInfo.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\NumberStyles.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\Ordinal.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\OrdinalCasing.Icu.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\PersianCalendar.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\RegionInfo.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Globalization\SortKey.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,135 +35,6 @@ private void IcuInitSortHandle()
}
}

private static unsafe int IcuIndexOfOrdinalCore(ReadOnlySpan<char> source, ReadOnlySpan<char> value, bool ignoreCase, bool fromBeginning)
{
Debug.Assert(!GlobalizationMode.Invariant);
Debug.Assert(!GlobalizationMode.UseNls);
Debug.Assert(!value.IsEmpty);

// Ordinal (non-linguistic) comparisons require the length of the target string to be no greater
// than the length of the search space. Since our caller already checked for empty target strings,
// the below check also handles the case of empty search space strings.

if (source.Length < value.Length)
{
return -1;
}

Debug.Assert(!source.IsEmpty);

if (ignoreCase)
{
fixed (char* pSource = &MemoryMarshal.GetReference(source))
fixed (char* pValue = &MemoryMarshal.GetReference(value))
{
return Interop.Globalization.IndexOfOrdinalIgnoreCase(pValue, value.Length, pSource, source.Length, findLast: !fromBeginning);
}
}

int startIndex, endIndex, jump;
if (fromBeginning)
{
// Left to right, from zero to last possible index in the source string.
// Incrementing by one after each iteration. Stop condition is last possible index plus 1.
startIndex = 0;
endIndex = source.Length - value.Length + 1;
jump = 1;
}
else
{
// Right to left, from first possible index in the source string to zero.
// Decrementing by one after each iteration. Stop condition is last possible index minus 1.
startIndex = source.Length - value.Length;
endIndex = -1;
jump = -1;
}

for (int i = startIndex; i != endIndex; i += jump)
{
int valueIndex, sourceIndex;

for (valueIndex = 0, sourceIndex = i;
valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
valueIndex++, sourceIndex++)
;

if (valueIndex == value.Length)
{
return i;
}
}

return -1;
}

private static unsafe int IcuLastIndexOfOrdinalCore(string source, string value, int startIndex, int count, bool ignoreCase)
{
Debug.Assert(!GlobalizationMode.Invariant);
Debug.Assert(!GlobalizationMode.UseNls);

Debug.Assert(source != null);
Debug.Assert(value != null);

if (value.Length == 0)
{
return startIndex;
}

if (count < value.Length)
{
return -1;
}

// startIndex is the index into source where we start search backwards from.
// leftStartIndex is the index into source of the start of the string that is
// count characters away from startIndex.
int leftStartIndex = startIndex - count + 1;

if (ignoreCase)
{
fixed (char* pSource = source)
{
int lastIndex = Interop.Globalization.IndexOfOrdinalIgnoreCase(value, value.Length, pSource + leftStartIndex, count, findLast: true);
return lastIndex != -1 ?
leftStartIndex + lastIndex :
-1;
}
}

for (int i = startIndex - value.Length + 1; i >= leftStartIndex; i--)
{
int valueIndex, sourceIndex;

for (valueIndex = 0, sourceIndex = i;
valueIndex < value.Length && source[sourceIndex] == value[valueIndex];
valueIndex++, sourceIndex++) ;

if (valueIndex == value.Length) {
return i;
}
}

return -1;
}

private static unsafe int IcuCompareStringOrdinalIgnoreCase(ref char string1, int count1, ref char string2, int count2)
{
Debug.Assert(!GlobalizationMode.Invariant);
Debug.Assert(!GlobalizationMode.UseNls);

Debug.Assert(count1 > 0);
Debug.Assert(count2 > 0);

fixed (char* char1 = &string1)
fixed (char* char2 = &string2)
{
Debug.Assert(char1 != null);
Debug.Assert(char2 != null);
return Interop.Globalization.CompareStringOrdinalIgnoreCase(char1, count1, char2, count2);
}
}

private unsafe int IcuCompareString(ReadOnlySpan<char> string1, ReadOnlySpan<char> string2, CompareOptions options)
{
Debug.Assert(!GlobalizationMode.Invariant);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ internal static unsafe int InvariantIndexOf(ReadOnlySpan<char> source, ReadOnlyS
}
}

private static unsafe int InvariantLastIndexOf(string source, string value, int startIndex, int count, bool ignoreCase)
internal static unsafe int InvariantLastIndexOf(string source, string value, int startIndex, int count, bool ignoreCase)
{
Debug.Assert(!string.IsNullOrEmpty(source));
Debug.Assert(value != null);
Expand Down

0 comments on commit 1c1dc8b

Please sign in to comment.