diff --git a/mongodb/Projects/InvalidChaptersInProjects.mongodb b/mongodb/Projects/InvalidChaptersInProjects.mongodb
new file mode 100644
index 00000000000..5b3c057310f
--- /dev/null
+++ b/mongodb/Projects/InvalidChaptersInProjects.mongodb
@@ -0,0 +1,58 @@
+// This script gets a list of invalid chapters in all projects that are not resources and lists them as CSV.
+// To see the invalid ops, run the script ../Texts/InvalidOps.mongodb
+use("xforge");
+
+const projects = db.sf_projects
+ .aggregate([
+ { $match: { resourceConfig: null } },
+ {
+ $project: {
+ _id: 1,
+ paratextId: 1,
+ texts: 1
+ }
+ },
+ { $unwind: { path: "$texts" } },
+ { $unwind: { path: "$texts.chapters" } },
+ {
+ $match: {
+ "texts.chapters.isValid": false,
+ $and: [
+ {
+ "texts.bookNum": {
+ $not: { $gte: 93, $lte: 102 }
+ }
+ },
+ {
+ "texts.bookNum": {
+ $not: { $gte: 107, $lte: 111 }
+ }
+ }
+ ]
+ }
+ },
+ {
+ $project: {
+ projectId: "$_id",
+ paratextId: 1,
+ bookNumber: "$texts.bookNum",
+ chapterNumber: "$texts.chapters.number"
+ }
+ }
+ ])
+ .toArray();
+
+if (projects.length > 0) {
+ console.log("projectId,paratextId,bookNumber,chapterNumber");
+ console.log(
+ projects
+ .map(
+ project => project.projectId + "," + project.paratextId + "," + project.bookNumber + "," + project.chapterNumber
+ )
+ .join("\n")
+ );
+} else {
+ console.log("none");
+}
+
+projects;
diff --git a/mongodb/Projects/ProjectsWithInvalidDocs.mongodb b/mongodb/Projects/ProjectsWithInvalidDocs.mongodb
index 95a8a1e3ee3..04ee358a3b2 100644
--- a/mongodb/Projects/ProjectsWithInvalidDocs.mongodb
+++ b/mongodb/Projects/ProjectsWithInvalidDocs.mongodb
@@ -1,4 +1,5 @@
// Find projects that have a text doc that is "invalid" (not the same as corrupted)
+// For a greater granularity, see the script InvalidChaptersInProjects.mongodb
use("xforge");
const invalidProjects = db.sf_projects.countDocuments({
diff --git a/mongodb/Texts/InvalidOps.mongodb b/mongodb/Texts/InvalidOps.mongodb
new file mode 100644
index 00000000000..4ad83dc5f25
--- /dev/null
+++ b/mongodb/Texts/InvalidOps.mongodb
@@ -0,0 +1,39 @@
+// This script searches for ops in texts that are invalid because Scripture Forge does not yet support them.
+//
+// The list generated will be an excellent starting point for missing USFM tags to implement in the XSD and Quill.
+// For speed reasons, invalid ops inside footnotes are not matched.
+use("xforge");
+
+const texts = db.texts
+ .aggregate([
+ {
+ $match: {
+ ops: {
+ $elemMatch: {
+ $or: [{ "attributes.invalid-inline": true }, { "attributes.invalid-block": true }]
+ }
+ }
+ }
+ },
+ { $unwind: "$ops" },
+ {
+ $match: {
+ $or: [{ "ops.attributes.invalid-inline": true }, { "ops.attributes.invalid-block": true }]
+ }
+ },
+ {
+ $project: {
+ _id: 1,
+ op: "$ops"
+ }
+ }
+ ])
+ .toArray();
+
+if (texts.length > 0) {
+ console.log(texts.map(text => JSON.stringify(text)).join("\n"));
+} else {
+ console.log("none");
+}
+
+texts;
diff --git a/mongodb/Texts/InvalidTexts.mongodb b/mongodb/Texts/InvalidTexts.mongodb
new file mode 100644
index 00000000000..01f80669874
--- /dev/null
+++ b/mongodb/Texts/InvalidTexts.mongodb
@@ -0,0 +1,30 @@
+// This script searches for ops in texts that are invalid because Scripture Forge does not yet support them.\
+//
+// NOTE: Only the first op will be returned for each text!
+//
+// The list generated will be an excellent starting point for missing USFM tags to implement in the XSD and Quill.
+// For speed reasons, invalid ops inside footnotes are not matched.
+use("xforge");
+
+const texts = db.texts
+ .find(
+ {
+ ops: {
+ $elemMatch: {
+ $or: [{ "attributes.invalid-inline": true }, { "attributes.invalid-block": true }]
+ }
+ }
+ },
+ {
+ "ops.$": 1
+ }
+ )
+ .toArray();
+
+if (texts.length > 0) {
+ console.log(texts.map(text => JSON.stringify(text)).join("\n"));
+} else {
+ console.log("none");
+}
+
+texts;
diff --git a/tools/CommitGenerator/CommitGenerator.csproj b/tools/CommitGenerator/CommitGenerator.csproj
index 8b0fa7e5fce..12fd6eaf040 100644
--- a/tools/CommitGenerator/CommitGenerator.csproj
+++ b/tools/CommitGenerator/CommitGenerator.csproj
@@ -29,6 +29,9 @@
PreserveNewest
+
+ PreserveNewest
+
PreserveNewest
diff --git a/tools/CommitGenerator/Program.cs b/tools/CommitGenerator/Program.cs
index 37debb05608..feb95eba047 100644
--- a/tools/CommitGenerator/Program.cs
+++ b/tools/CommitGenerator/Program.cs
@@ -6,6 +6,7 @@
using Paratext.Data;
using Paratext.Data.Languages;
using Paratext.Data.Repository;
+using PtxUtils;
using SIL.Scripture;
#nullable enable
@@ -63,8 +64,10 @@
Hg.Default = new Hg(customHgPath, hgMerge, assemblyDirectory);
// Setup Paratext
-ICUDllLocator.Initialize();
+ICUDllLocator.Initialize(confineICUVersion: false);
WritingSystemRepository.Initialize();
+Alert.Implementation = new TestAlert();
+RegistryU.Implementation = new TestRegistryU();
Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
ScrTextCollection.Initialize(Environment.GetEnvironmentVariable("PARATEXT_PROJECTS"));
ScrTextCollection.RefreshScrTexts(allowMigration: false);
diff --git a/tools/Roundtrip/Program.cs b/tools/Roundtrip/Program.cs
index d5ec51f4916..4085efb5feb 100644
--- a/tools/Roundtrip/Program.cs
+++ b/tools/Roundtrip/Program.cs
@@ -1,5 +1,6 @@
using System.Xml;
using System.Xml.Linq;
+using System.Xml.Schema;
using System.Xml.XPath;
using ICSharpCode.SharpZipLib.Zip;
using Microsoft.Extensions.Configuration;
@@ -8,6 +9,7 @@
using Paratext.Data;
using Paratext.Data.Languages;
using Paratext.Data.Users;
+using PtxUtils;
using Roundtrip;
using SIL.Converters.Usj;
using SIL.XForge.Configuration;
@@ -30,11 +32,19 @@
Directory.CreateDirectory("output");
}
+// See if we are validating the USX and outputting any error
+bool validateUsx = args.Length > 1 && args[1] == "--validate-usx";
+var schemas = new XmlSchemaSet();
+schemas.Add(string.Empty, "usx-sf.xsd");
+schemas.Compile();
+
// Setup Paratext
RegistrationInfo.Implementation = new TestRegistrationInfo();
-ICUDllLocator.Initialize();
+ICUDllLocator.Initialize(confineICUVersion: false);
WritingSystemRepository.Initialize();
-ScrTextCollection.Initialize();
+Alert.Implementation = new TestAlert();
+RegistryU.Implementation = new TestRegistryU();
+ScrTextCollection.Initialize(Environment.GetEnvironmentVariable("PARATEXT_PROJECTS"));
using var scrText = new DummyScrText(useFakeStylesheet: false);
ScrTextCollection.Add(scrText);
ILoggerFactory loggerFactory = LoggerFactory.Create(builder => builder.AddConsole());
@@ -231,6 +241,23 @@ void Roundtrip(string usfm, string fileName, string path, RoundtripMethod roundt
using XmlNodeReader nodeReader = new XmlNodeReader(usx);
nodeReader.MoveToContent();
actualUsx = XDocument.Load(nodeReader);
+
+ // Validate the USX if requested
+ if (validateUsx)
+ {
+ actualUsx.Validate(
+ schemas,
+ (o, e) =>
+ {
+ Console.WriteLine("============================================================");
+ Console.WriteLine($"Validation Error in: {Path.Combine(path, fileName)}");
+ XNode? node = o is XAttribute attr ? attr.Parent : o as XNode;
+ Console.WriteLine(node);
+ Console.WriteLine(e.Exception);
+ },
+ true
+ );
+ }
}
else
{
diff --git a/tools/Roundtrip/README.md b/tools/Roundtrip/README.md
index ffef300553e..ec763392fed 100644
--- a/tools/Roundtrip/README.md
+++ b/tools/Roundtrip/README.md
@@ -28,7 +28,14 @@ Or, to export all of the files that the tool roundtrips:
dotnet run /var/lib/scriptureforge/sync/ --output-all
```
+You can also validate the USX to ensure that Scripture Forge supports the USFM files:
+
+```sh
+dotnet run /var/lib/scriptureforge/sync/ --validate-usx
+```
+
## Notes
+- If on Linux, **you must** set the `PARATEXT_PROJECTS` environment variable to your Paratext project directory.
- Unlike **ServalDownloader**, this tool does not utilize the user secrets you have configured for Scripture Forge.
- To view a time series graph of the builds, select all of the data on the Summary sheet, and create a 2-D Line Graph.
diff --git a/tools/Roundtrip/Roundtrip.csproj b/tools/Roundtrip/Roundtrip.csproj
index c112bd3a73d..31b95b45a5a 100644
--- a/tools/Roundtrip/Roundtrip.csproj
+++ b/tools/Roundtrip/Roundtrip.csproj
@@ -14,7 +14,7 @@
-
+