diff --git a/mongodb/Projects/InvalidChaptersInProjects.mongodb b/mongodb/Projects/InvalidChaptersInProjects.mongodb new file mode 100644 index 00000000000..5b3c057310f --- /dev/null +++ b/mongodb/Projects/InvalidChaptersInProjects.mongodb @@ -0,0 +1,58 @@ +// This script gets a list of invalid chapters in all projects that are not resources and lists them as CSV. +// To see the invalid ops, run the script ../Texts/InvalidOps.mongodb +use("xforge"); + +const projects = db.sf_projects + .aggregate([ + { $match: { resourceConfig: null } }, + { + $project: { + _id: 1, + paratextId: 1, + texts: 1 + } + }, + { $unwind: { path: "$texts" } }, + { $unwind: { path: "$texts.chapters" } }, + { + $match: { + "texts.chapters.isValid": false, + $and: [ + { + "texts.bookNum": { + $not: { $gte: 93, $lte: 102 } + } + }, + { + "texts.bookNum": { + $not: { $gte: 107, $lte: 111 } + } + } + ] + } + }, + { + $project: { + projectId: "$_id", + paratextId: 1, + bookNumber: "$texts.bookNum", + chapterNumber: "$texts.chapters.number" + } + } + ]) + .toArray(); + +if (projects.length > 0) { + console.log("projectId,paratextId,bookNumber,chapterNumber"); + console.log( + projects + .map( + project => project.projectId + "," + project.paratextId + "," + project.bookNumber + "," + project.chapterNumber + ) + .join("\n") + ); +} else { + console.log("none"); +} + +projects; diff --git a/mongodb/Projects/ProjectsWithInvalidDocs.mongodb b/mongodb/Projects/ProjectsWithInvalidDocs.mongodb index 95a8a1e3ee3..04ee358a3b2 100644 --- a/mongodb/Projects/ProjectsWithInvalidDocs.mongodb +++ b/mongodb/Projects/ProjectsWithInvalidDocs.mongodb @@ -1,4 +1,5 @@ // Find projects that have a text doc that is "invalid" (not the same as corrupted) +// For a greater granularity, see the script InvalidChaptersInProjects.mongodb use("xforge"); const invalidProjects = db.sf_projects.countDocuments({ diff --git a/mongodb/Texts/InvalidOps.mongodb b/mongodb/Texts/InvalidOps.mongodb new file mode 100644 index 00000000000..4ad83dc5f25 --- /dev/null +++ b/mongodb/Texts/InvalidOps.mongodb @@ -0,0 +1,39 @@ +// This script searches for ops in texts that are invalid because Scripture Forge does not yet support them. +// +// The list generated will be an excellent starting point for missing USFM tags to implement in the XSD and Quill. +// For speed reasons, invalid ops inside footnotes are not matched. +use("xforge"); + +const texts = db.texts + .aggregate([ + { + $match: { + ops: { + $elemMatch: { + $or: [{ "attributes.invalid-inline": true }, { "attributes.invalid-block": true }] + } + } + } + }, + { $unwind: "$ops" }, + { + $match: { + $or: [{ "ops.attributes.invalid-inline": true }, { "ops.attributes.invalid-block": true }] + } + }, + { + $project: { + _id: 1, + op: "$ops" + } + } + ]) + .toArray(); + +if (texts.length > 0) { + console.log(texts.map(text => JSON.stringify(text)).join("\n")); +} else { + console.log("none"); +} + +texts; diff --git a/mongodb/Texts/InvalidTexts.mongodb b/mongodb/Texts/InvalidTexts.mongodb new file mode 100644 index 00000000000..01f80669874 --- /dev/null +++ b/mongodb/Texts/InvalidTexts.mongodb @@ -0,0 +1,30 @@ +// This script searches for ops in texts that are invalid because Scripture Forge does not yet support them.\ +// +// NOTE: Only the first op will be returned for each text! +// +// The list generated will be an excellent starting point for missing USFM tags to implement in the XSD and Quill. +// For speed reasons, invalid ops inside footnotes are not matched. +use("xforge"); + +const texts = db.texts + .find( + { + ops: { + $elemMatch: { + $or: [{ "attributes.invalid-inline": true }, { "attributes.invalid-block": true }] + } + } + }, + { + "ops.$": 1 + } + ) + .toArray(); + +if (texts.length > 0) { + console.log(texts.map(text => JSON.stringify(text)).join("\n")); +} else { + console.log("none"); +} + +texts; diff --git a/tools/CommitGenerator/CommitGenerator.csproj b/tools/CommitGenerator/CommitGenerator.csproj index 8b0fa7e5fce..12fd6eaf040 100644 --- a/tools/CommitGenerator/CommitGenerator.csproj +++ b/tools/CommitGenerator/CommitGenerator.csproj @@ -29,6 +29,9 @@ PreserveNewest + + PreserveNewest + PreserveNewest diff --git a/tools/CommitGenerator/Program.cs b/tools/CommitGenerator/Program.cs index 37debb05608..feb95eba047 100644 --- a/tools/CommitGenerator/Program.cs +++ b/tools/CommitGenerator/Program.cs @@ -6,6 +6,7 @@ using Paratext.Data; using Paratext.Data.Languages; using Paratext.Data.Repository; +using PtxUtils; using SIL.Scripture; #nullable enable @@ -63,8 +64,10 @@ Hg.Default = new Hg(customHgPath, hgMerge, assemblyDirectory); // Setup Paratext -ICUDllLocator.Initialize(); +ICUDllLocator.Initialize(confineICUVersion: false); WritingSystemRepository.Initialize(); +Alert.Implementation = new TestAlert(); +RegistryU.Implementation = new TestRegistryU(); Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); ScrTextCollection.Initialize(Environment.GetEnvironmentVariable("PARATEXT_PROJECTS")); ScrTextCollection.RefreshScrTexts(allowMigration: false); diff --git a/tools/Roundtrip/Program.cs b/tools/Roundtrip/Program.cs index d5ec51f4916..4085efb5feb 100644 --- a/tools/Roundtrip/Program.cs +++ b/tools/Roundtrip/Program.cs @@ -1,5 +1,6 @@ using System.Xml; using System.Xml.Linq; +using System.Xml.Schema; using System.Xml.XPath; using ICSharpCode.SharpZipLib.Zip; using Microsoft.Extensions.Configuration; @@ -8,6 +9,7 @@ using Paratext.Data; using Paratext.Data.Languages; using Paratext.Data.Users; +using PtxUtils; using Roundtrip; using SIL.Converters.Usj; using SIL.XForge.Configuration; @@ -30,11 +32,19 @@ Directory.CreateDirectory("output"); } +// See if we are validating the USX and outputting any error +bool validateUsx = args.Length > 1 && args[1] == "--validate-usx"; +var schemas = new XmlSchemaSet(); +schemas.Add(string.Empty, "usx-sf.xsd"); +schemas.Compile(); + // Setup Paratext RegistrationInfo.Implementation = new TestRegistrationInfo(); -ICUDllLocator.Initialize(); +ICUDllLocator.Initialize(confineICUVersion: false); WritingSystemRepository.Initialize(); -ScrTextCollection.Initialize(); +Alert.Implementation = new TestAlert(); +RegistryU.Implementation = new TestRegistryU(); +ScrTextCollection.Initialize(Environment.GetEnvironmentVariable("PARATEXT_PROJECTS")); using var scrText = new DummyScrText(useFakeStylesheet: false); ScrTextCollection.Add(scrText); ILoggerFactory loggerFactory = LoggerFactory.Create(builder => builder.AddConsole()); @@ -231,6 +241,23 @@ void Roundtrip(string usfm, string fileName, string path, RoundtripMethod roundt using XmlNodeReader nodeReader = new XmlNodeReader(usx); nodeReader.MoveToContent(); actualUsx = XDocument.Load(nodeReader); + + // Validate the USX if requested + if (validateUsx) + { + actualUsx.Validate( + schemas, + (o, e) => + { + Console.WriteLine("============================================================"); + Console.WriteLine($"Validation Error in: {Path.Combine(path, fileName)}"); + XNode? node = o is XAttribute attr ? attr.Parent : o as XNode; + Console.WriteLine(node); + Console.WriteLine(e.Exception); + }, + true + ); + } } else { diff --git a/tools/Roundtrip/README.md b/tools/Roundtrip/README.md index ffef300553e..ec763392fed 100644 --- a/tools/Roundtrip/README.md +++ b/tools/Roundtrip/README.md @@ -28,7 +28,14 @@ Or, to export all of the files that the tool roundtrips: dotnet run /var/lib/scriptureforge/sync/ --output-all ``` +You can also validate the USX to ensure that Scripture Forge supports the USFM files: + +```sh +dotnet run /var/lib/scriptureforge/sync/ --validate-usx +``` + ## Notes +- If on Linux, **you must** set the `PARATEXT_PROJECTS` environment variable to your Paratext project directory. - Unlike **ServalDownloader**, this tool does not utilize the user secrets you have configured for Scripture Forge. - To view a time series graph of the builds, select all of the data on the Summary sheet, and create a 2-D Line Graph. diff --git a/tools/Roundtrip/Roundtrip.csproj b/tools/Roundtrip/Roundtrip.csproj index c112bd3a73d..31b95b45a5a 100644 --- a/tools/Roundtrip/Roundtrip.csproj +++ b/tools/Roundtrip/Roundtrip.csproj @@ -14,7 +14,7 @@ - +