Skip to content

Commit

Permalink
Correctly implement relaxed comparison for UsfmTextUpdater
Browse files Browse the repository at this point in the history
  • Loading branch information
ddaspit committed May 31, 2024
1 parent 29134bf commit a69afa9
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 106 deletions.
28 changes: 14 additions & 14 deletions src/SIL.Machine/Corpora/ScriptureElement.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,28 +13,28 @@ public ScriptureElement(int position, string name)
public int Position { get; }
public string Name { get; }

int IComparable<ScriptureElement>.CompareTo(ScriptureElement other)
public ScriptureElement ToRelaxed()
{
return CompareTo(other, strict: true);
return new ScriptureElement(0, Name);
}

public int CompareTo(ScriptureElement other, bool strict = true)
public int CompareTo(ScriptureElement other)
{
if (strict)
if (Position == 0 || other.Position == 0)
{
int res = Position.CompareTo(other.Position);
if (res != 0)
return res;
if (Name == other.Name)
return 0;
// position 0 is always greater than any other position
if (Position == 0 && other.Position != 0)
return 1;
if (other.Position == 0 && Position != 0)
return -1;
return Name.CompareTo(other.Name);
}

if (Name == other.Name)
return 0;
// position 0 is always greater than any other position
if (Position == 0 && other.Position != 0)
return 1;
if (other.Position == 0 && Position != 0)
return -1;
int res = Position.CompareTo(other.Position);
if (res != 0)
return res;
return Name.CompareTo(other.Name);
}

Expand Down
11 changes: 8 additions & 3 deletions src/SIL.Machine/Corpora/ScriptureRef.cs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,11 @@ public ScriptureRef(VerseRef verseRef = default, IEnumerable<ScriptureElement> p
public bool IsEmpty => VerseRef.IsDefault;
public bool IsVerse => VerseRef.VerseNum != 0 && Path.Count == 0;

public ScriptureRef ToRelaxed()
{
return new ScriptureRef(VerseRef, Path.Select(pe => pe.ToRelaxed()));
}

public ScriptureRef ChangeVersification(ScrVers versification)
{
VerseRef vr = VerseRef.Clone();
Expand All @@ -73,7 +78,7 @@ int IComparable<ScriptureRef>.CompareTo(ScriptureRef other)
return CompareTo(other, compareSegments: true);
}

public int CompareTo(ScriptureRef other, bool compareSegments = true, bool strict = true)
public int CompareTo(ScriptureRef other, bool compareSegments = true)
{
IComparer<VerseRef> comparer = compareSegments ? VerseRefComparer.Default : VerseRefComparer.IgnoreSegments;
int res = comparer.Compare(VerseRef, other.VerseRef);
Expand All @@ -82,12 +87,12 @@ public int CompareTo(ScriptureRef other, bool compareSegments = true, bool stric

foreach ((ScriptureElement se1, ScriptureElement se2) in Path.Zip(other.Path))
{
res = se1.CompareTo(se2, strict);
res = se1.CompareTo(se2);
if (res != 0)
return res;
}

return Path.Count - other.Path.Count;
return Path.Count.CompareTo(other.Path.Count);
}

public int CompareTo(object obj)
Expand Down
9 changes: 1 addition & 8 deletions src/SIL.Machine/Corpora/UsfmTextUpdater.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ public class UsfmTextUpdater : ScriptureRefUsfmParserHandlerBase
private readonly List<UsfmToken> _newTokens;
private readonly string _idText;
private readonly bool _stripAllText;
private readonly bool _strictComparison;
private readonly bool _preferExistingText;
private readonly Stack<bool> _replace;
private int _rowIndex;
Expand All @@ -25,7 +24,6 @@ public class UsfmTextUpdater : ScriptureRefUsfmParserHandlerBase
IReadOnlyList<(IReadOnlyList<ScriptureRef>, string)> rows = null,
string idText = null,
bool stripAllText = false,
bool strictComparison = true,
bool preferExistingText = false
)
{
Expand All @@ -34,7 +32,6 @@ public class UsfmTextUpdater : ScriptureRefUsfmParserHandlerBase
_newTokens = new List<UsfmToken>();
_idText = idText;
_stripAllText = stripAllText;
_strictComparison = strictComparison;
_replace = new Stack<bool>();
_preferExistingText = preferExistingText;
}
Expand Down Expand Up @@ -315,11 +312,7 @@ private IReadOnlyList<string> AdvanceRows(IReadOnlyList<ScriptureRef> segScrRefs
{
while (sourceIndex < segScrRefs.Count)
{
compare = rowScrRef.CompareTo(
segScrRefs[sourceIndex],
compareSegments: false,
_strictComparison
);
compare = rowScrRef.CompareTo(segScrRefs[sourceIndex], compareSegments: false);
if (compare > 0)
// source is ahead of row, increment source
sourceIndex++;
Expand Down
44 changes: 9 additions & 35 deletions tests/SIL.Machine.Tests/Corpora/ScriptureRefTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,42 +17,16 @@ public class ScriptureRefTests
[TestCase("MAT 1:0/2:p", "MAT 1:0/1:p", ExpectedResult = 1, Description = "NonVerseGreaterThan")]
[TestCase("MAT 1:0/1:esb", "MAT 1:0/1:esb/1:p", ExpectedResult = -1, Description = "NonVerseParentChild")]
[TestCase("MAT 1:0/2:esb", "MAT 1:0/1:esb/1:p", ExpectedResult = 1, Description = "NonVerseParentOtherChild")]
public int CompareTo_Strict(string ref1Str, string ref2Str)
[TestCase("MAT 1:0/p", "MAT 1:0/2:p", ExpectedResult = 0, Description = "RelaxedSameMarker")]
[TestCase("MAT 1:0/p", "MAT 1:0/2:esb", ExpectedResult = 1, Description = "RelaxedSameLevel")]
[TestCase("MAT 1:0/esb", "MAT 1:0/1:esb/1:p", ExpectedResult = -1, Description = "RelaxedParentChild")]
[TestCase("MAT 1:0/2:esb", "MAT 1:0/esb/p", ExpectedResult = -1, Description = "ParentRelaxedChild")]
public int CompareTo(string ref1Str, string ref2Str)
{
var ref1 = ScriptureRef.Parse(ref1Str);
var ref2 = ScriptureRef.Parse(ref2Str);

int result = ref1.CompareTo(ref2);

// this tests the IComparable<ScriptureRef>.CompareTo method responds the same as the ScriptureRef.CompareTo method.
int result2 = ((IComparable<ScriptureRef>)ref1).CompareTo(ref2);
Assert.That(result, Is.EqualTo(result2));

if (result < 0)
result = -1;
else if (result > 0)
result = 1;
return result;
}

[TestCase("MAT 1:1", "MAT 1:2", ExpectedResult = -1, Description = "VerseLessThan")]
[TestCase("MAT 1:1", "MAT 1:1", ExpectedResult = 0, Description = "VerseEqualTo")]
[TestCase("MAT 1:2", "MAT 1:1", ExpectedResult = 1, Description = "VerseGreaterThan")]
[TestCase("MAT 1:0/1:p", "MAT 1:0/2:p", ExpectedResult = 0, Description = "NonVerseSameMarkerDifferentPosition")]
[TestCase("MAT 1:0/1:esb", "MAT 1:0/1:esb/1:p", ExpectedResult = -1, Description = "NonVerseParentChild")]
[TestCase("MAT 1:0/2:esb", "MAT 1:0/1:esb/1:p", ExpectedResult = -1, Description = "NonVerseParentOtherChild")]
public int CompareTo_Relaxed(string ref1Str, string ref2Str)
{
var ref1 = ScriptureRef.Parse(ref1Str);
var ref2 = ScriptureRef.Parse(ref2Str);

int result = ref1.CompareTo(ref2, strict: false);

if (result < 0)
result = -1;
else if (result > 0)
result = 1;
return result;
return ref1.CompareTo(ref2);
}

[TestCase]
Expand All @@ -64,9 +38,9 @@ public void IsEqualTo()
var obj1 = "A different type";
Assert.Multiple(() =>
{
Assert.That(ref1.Equals(ref1dup));
Assert.That(!ref1.Equals(ref2));
Assert.That(!ref1.Equals(obj1));
Assert.That(ref1.Equals(ref1dup), Is.True);
Assert.That(ref1.Equals(ref2), Is.False);
Assert.That(ref1.Equals(obj1), Is.False);
});
}

Expand Down
68 changes: 25 additions & 43 deletions tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,31 +10,21 @@ public class UsfmManualTests
[Ignore("This is for manual testing only. Remove this tag to run the test.")]
public void ParseParallelCorpus()
{
var tCorpus = new ParatextTextCorpus(
projectDir: CorporaTestHelpers.UsfmTargetProjectPath,
includeAllText: true,
includeMarkers: true
);
ParatextTextCorpus tCorpus =
new(projectDir: CorporaTestHelpers.UsfmTargetProjectPath, includeAllText: true, includeMarkers: true);

var sCorpus = new ParatextTextCorpus(
projectDir: CorporaTestHelpers.UsfmSourceProjectPath,
includeAllText: true,
includeMarkers: true
);
ParatextTextCorpus sCorpus =
new(projectDir: CorporaTestHelpers.UsfmSourceProjectPath, includeAllText: true, includeMarkers: true);

ParallelTextCorpus pCorpus = new ParallelTextCorpus(
sCorpus,
tCorpus,
alignmentCorpus: null,
rowRefComparer: null
)
{
AllSourceRows = true,
AllTargetRows = false
};
ParallelTextCorpus pCorpus =
new(sCorpus, tCorpus, alignmentCorpus: null, rowRefComparer: null)
{
AllSourceRows = true,
AllTargetRows = false
};

var rows = pCorpus.GetRows().ToList();
Assert.That(rows.Count, Is.Not.Zero);
List<ParallelTextRow> rows = pCorpus.GetRows().ToList();
Assert.That(rows, Has.Count.GreaterThan(0));
}

public record PretranslationDto
Expand All @@ -54,28 +44,25 @@ public record PretranslationDto
[Ignore("This is for manual testing only. Remove this tag to run the test.")]
public async Task CreateUsfmFile()
{
var parser = new FileParatextProjectSettingsParser(ParatextProjectPath);
FileParatextProjectSettingsParser parser = new(ParatextProjectPath);
ParatextProjectSettings settings = parser.Parse();

// Read text from pretranslations file
Stream pretranslationStream = File.OpenRead(PretranslationPath);
IAsyncEnumerable<PretranslationDto?>? pretranslations =
JsonSerializer.DeserializeAsyncEnumerable<PretranslationDto>(
using Stream pretranslationStream = File.OpenRead(PretranslationPath);
(IReadOnlyList<ScriptureRef>, string)[] pretranslations = await JsonSerializer
.DeserializeAsyncEnumerable<PretranslationDto>(
pretranslationStream,
new JsonSerializerOptions { PropertyNamingPolicy = JsonNamingPolicy.CamelCase }
);

var pretranslationsList = (await pretranslations.ToListAsync())
.Where(p => p is not null)
)
.Select(p =>
(
(IReadOnlyList<ScriptureRef>)
p!.Refs.Select(r => ScriptureRef.Parse(r, settings.Versification)).ToList(),
p.Translation
(IReadOnlyList<ScriptureRef>)(
p?.Refs.Select(r => ScriptureRef.Parse(r, settings.Versification).ToRelaxed()).ToArray() ?? []
),
p?.Translation ?? ""
)
)
.OrderBy(p => p.Item1[0])
.ToList();
.ToArrayAsync();

foreach (
string sfmFileName in Directory.EnumerateFiles(
Expand All @@ -84,15 +71,10 @@ public async Task CreateUsfmFile()
)
)
{
var updater = new UsfmTextUpdater(
pretranslationsList,
stripAllText: true,
strictComparison: false,
preferExistingText: true
);
var usfm = await File.ReadAllTextAsync(sfmFileName);
var updater = new UsfmTextUpdater(pretranslations, stripAllText: true, preferExistingText: true);
string usfm = await File.ReadAllTextAsync(sfmFileName);
UsfmParser.Parse(usfm, updater, settings.Stylesheet, settings.Versification);
var newUsfm = updater.GetUsfm(settings.Stylesheet);
string newUsfm = updater.GetUsfm(settings.Stylesheet);
Assert.That(newUsfm, Is.Not.Null);
}
}
Expand Down
4 changes: 1 addition & 3 deletions tests/SIL.Machine.Tests/Corpora/UsfmTextUpdaterTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ public void GetUsfm_NonVerse_Relaxed()
(ScrRef("MAT 2:0/tr/tc1"), "The third cell of the table.")
};

string target = UpdateUsfm(rows, strictComparison: false);
string target = UpdateUsfm(rows);
Assert.That(target, Contains.Substring("\\s The first chapter.\r\n"));
Assert.That(target, Contains.Substring("\\v 1 First verse of the first chapter.\r\n"));
Assert.That(
Expand Down Expand Up @@ -401,7 +401,6 @@ private static ScriptureRef[] ScrRef(params string[] refs)
IReadOnlyList<(IReadOnlyList<ScriptureRef>, string)>? rows = null,
string? idText = null,
bool stripAllText = false,
bool strictComparison = true,
bool preferExistingText = false
)
{
Expand All @@ -410,7 +409,6 @@ private static ScriptureRef[] ScrRef(params string[] refs)
rows,
idText,
stripAllText: stripAllText,
strictComparison: strictComparison,
preferExistingText: preferExistingText
);
UsfmParser.Parse(source, updater);
Expand Down

0 comments on commit a69afa9

Please sign in to comment.