Skip to content

Commit

Permalink
new TMatch.PrepareContains method and several optimizations to TMatch…
Browse files Browse the repository at this point in the history
… search
  • Loading branch information
Arnaud Bouchez committed Jan 8, 2019
1 parent dd0f631 commit 97ed96c
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 16 deletions.
22 changes: 17 additions & 5 deletions SynCommons.pas
Expand Up @@ -2787,7 +2787,11 @@ function ConvertCaseUTF8(P: PUTF8Char; const Table: TNormTableByte): PtrInt;

/// check if the supplied text has some case-insentitive 'a'..'z','A'..'Z' chars
// - will therefore be correct with true UTF-8 content, but only for 7 bit
function IsCaseSensitive(const S: RawUTF8): boolean;
function IsCaseSensitive(const S: RawUTF8): boolean; overload;

/// check if the supplied text has some case-insentitive 'a'..'z','A'..'Z' chars
// - will therefore be correct with true UTF-8 content, but only for 7 bit
function IsCaseSensitive(P: PUTF8Char; PLen: integer): boolean; overload;

/// fast conversion of the supplied text into uppercase
// - this will only convert 'a'..'z' into 'A'..'Z' (no NormToUpper use), and
Expand Down Expand Up @@ -28766,12 +28770,20 @@ function LowerCaseUnicode(const S: RawUTF8): RawUTF8;
end;

function IsCaseSensitive(const S: RawUTF8): boolean;
var i: PtrInt;
begin
result := IsCaseSensitive(pointer(S),length(S));
end;

function IsCaseSensitive(P: PUTF8Char; PLen: integer): boolean;
begin
result := true;
for i := 0 to length(S)-1 do
if PByteArray(S)[i] in [ord('a')..ord('z'), ord('A')..ord('Z')] then
exit;
if (P<>nil) and (PLen>0) then
repeat
if ord(P^) in [ord('a')..ord('z'), ord('A')..ord('Z')] then
exit;
inc(P);
dec(PLen);
until PLen=0;
result := false;
end;

Expand Down
62 changes: 52 additions & 10 deletions SynTable.pas
Expand Up @@ -110,7 +110,7 @@ function IsMatchString(const Pattern, Text: string; CaseInsensitive: boolean=fal
// - some common patterns ('exactmatch', 'startwith*', '*endwith', '*contained*')
// are handled with dedicated code, optionally with case-insensitive search
// - consider using TMatchs (or SetMatchs/TMatchDynArray) if you expect to
// search for several patterns
// search for several patterns, or even TExprParserMatch for expression search
{$ifdef UNICODE}TMatch = record{$else}TMatch = object{$endif}
private
Pattern, Text: PUTF8Char;
Expand All @@ -128,9 +128,14 @@ {$ifdef UNICODE}TMatch = record{$else}TMatch = object{$endif}
/// initialize the internal fields for a given glob search pattern
// - note that the aPattern buffer should remain in memory, since it will
// be pointed to by the Pattern private field of this object
procedure Prepare(aPattern: PUTF8Char; aPatternLen: integer; aCaseInsensitive, aReuse: boolean); overload;
procedure Prepare(aPattern: PUTF8Char; aPatternLen: integer;
aCaseInsensitive, aReuse: boolean); overload;
/// initialize low-level internal fields for a '*Pattern*' search
procedure PrepareContains(aPattern: PUTF8Char; aPatternLen: integer;
aCaseInsensitive: boolean); overload;
/// initialize low-level internal fields for a custom search algorithm
procedure PrepareRaw(aPattern: PUTF8Char; aPatternLen: integer; aSearch: TMatchSearchFunction);
procedure PrepareRaw(aPattern: PUTF8Char; aPatternLen: integer;
aSearch: TMatchSearchFunction);
/// returns TRUE if the supplied content matches the prepared glob pattern
// - this method is not thread-safe
function Match(const aText: RawUTF8): boolean; overload;
Expand Down Expand Up @@ -5399,7 +5404,7 @@ function SimpleContains8(t, tend, p: PUTF8Char; pmax: PtrInt): boolean; inline;
break;
end;
for i := 8 to pmax do
if (t + i >= tend + 7) or (t[i] <> p[i]) then
if t[i] <> p[i] then
goto next;
result := true;
exit;
Expand Down Expand Up @@ -5429,7 +5434,7 @@ function SimpleContains4(t, tend, p: PUTF8Char; pmax: PtrInt): boolean;
break;
end;
for i := 4 to pmax do
if (t + i >= tend + 3) or (t[i] <> p[i]) then
if t[i] <> p[i] then
goto next;
result := true;
exit;
Expand Down Expand Up @@ -5458,7 +5463,7 @@ function SimpleContains1(t, tend, p: PUTF8Char; pmax: PtrInt): boolean;
break;
end;
for i := 1 to pmax do
if (t + i >= tend) or (t[i] <> p[i]) then
if t[i] <> p[i] then
goto next;
result := true;
exit;
Expand Down Expand Up @@ -5500,20 +5505,32 @@ function SearchContainsU(aMatch: PMatch; aText: PUTF8Char; aTextLen: PtrInt): bo

function SearchContains1(aMatch: PMatch; aText: PUTF8Char; aTextLen: PtrInt): boolean;
begin
result := SimpleContains1(aText, aText + aTextLen, aMatch.Pattern, aMatch.PMax);
dec(aTextLen, aMatch.PMax);
if aTextLen > 0 then
result := SimpleContains1(aText, aText + aTextLen, aMatch.Pattern, aMatch.PMax)
else
result := false;
end;

function SearchContains4(aMatch: PMatch; aText: PUTF8Char; aTextLen: PtrInt): boolean;
begin
result := SimpleContains4(aText, aText + aTextLen - 3, aMatch.Pattern, aMatch.PMax);
dec(aTextLen, aMatch.PMax);
if aTextLen > 0 then
result := SimpleContains4(aText, aText + aTextLen, aMatch.Pattern, aMatch.PMax)
else
result := false;
end;

{$ifdef CPU64}
function SearchContains8(aMatch: PMatch; aText: PUTF8Char; aTextLen: PtrInt): boolean;
begin // optimized e.g. to search an IP address as '*12.34.56.78*' in logs
result := SimpleContains8(aText, aText + aTextLen - 7, aMatch.Pattern, aMatch.PMax);
dec(aTextLen, aMatch.PMax);
if aTextLen > 0 then
result := SimpleContains8(aText, aText + aTextLen, aMatch.Pattern, aMatch.PMax)
else
result := false;
end;
{$endif}
{$endif CPU64}

function SearchStartWith(aMatch: PMatch; aText: PUTF8Char; aTextLen: PtrInt): boolean;
begin
Expand Down Expand Up @@ -5546,6 +5563,8 @@ procedure TMatch.Prepare(aPattern: PUTF8Char; aPatternLen: integer;
aCaseInsensitive, aReuse: boolean);
const SPECIALS: PUTF8Char = '*?[';
begin
if aCaseInsensitive and not IsCaseSensitive(aPattern,aPatternLen) then
aCaseInsensitive := false; // don't slow down e.g. number or IP search
Pattern := aPattern;
PMax := aPatternLen - 1; // search in Pattern[0..PMax]
if aCaseInsensitive then
Expand Down Expand Up @@ -5616,6 +5635,29 @@ procedure TMatch.Prepare(aPattern: PUTF8Char; aPatternLen: integer;
end;
end;

procedure TMatch.PrepareContains(aPattern: PUTF8Char; aPatternLen: integer;
aCaseInsensitive: boolean);
begin
if aCaseInsensitive and not IsCaseSensitive(aPattern, aPatternLen) then
aCaseInsensitive := false;
Pattern := aPattern;
PMax := aPatternLen - 1;
if PMax < 0 then
Search := SearchContainsValid
else if aCaseInsensitive then begin
Upper := @NormToUpperAnsi7;
Search := SearchContainsU;
end
{$ifdef CPU64}
else if PMax >= 7 then
Search := SearchContains8
{$endif}
else if PMax >= 3 then
Search := SearchContains4
else
Search := SearchContains1;
end;

procedure TMatch.PrepareRaw(aPattern: PUTF8Char; aPatternLen: integer;
aSearch: TMatchSearchFunction);
begin
Expand Down
2 changes: 1 addition & 1 deletion SynopseCommit.inc
@@ -1 +1 @@
'1.18.4947'
'1.18.4948'

0 comments on commit 97ed96c

Please sign in to comment.