Skip to content

Commit

Permalink
Adding support to validate against XSD and SCH files
Browse files Browse the repository at this point in the history
  • Loading branch information
seanmcilvenna committed Jul 13, 2020
1 parent a245d5c commit 6455a76
Show file tree
Hide file tree
Showing 19 changed files with 1,074 additions and 27 deletions.
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ Help can be provided by the CLI tool itself:
| -i, --input | Required. The directory that contains the input XML files. |
| -o, --output | Required. The directory where output (XLSX) files should go. |
| -m, --move | The directory to move input files to once they are done being processed. |
| -x, --xsd | The path to an XML Schema (XSD) that should be used to validate the structure of each XMl document processed. |
| -s, --sch | The path to an ISO Schematron (SCH) file that should be used to validate the content of each XMl document processed. |

### Command: mdb

Expand All @@ -67,6 +69,8 @@ Help can be provided by the CLI tool itself:
| -i, --input | Required. The directory that contains the input XML files. |
| -o, --output | Required. The directory where output (MDB) files should go. |
| -m, --move | The directory to move input files to once they are done being processed. |
| -x, --xsd | The path to an XML Schema (XSD) that should be used to validate the structure of each XMl document processed. |
| -s, --sch | The path to an ISO Schematron (SCH) file that should be used to validate the content of each XMl document processed. |

### Command: db2

Expand All @@ -78,6 +82,8 @@ Help can be provided by the CLI tool itself:
| -p, --password | Required. The authenticated password to access the DB. |
| -d, --database | (Default: xdc) The name of the database to convert/output to. |
| -m, --move | The directory to move input files to once they are done being processed. |
| -x, --xsd | The path to an XML Schema (XSD) that should be used to validate the structure of each XMl document processed. |
| -s, --sch | The path to an ISO Schematron (SCH) file that should be used to validate the content of each XMl document processed. |

## DB2 Conversion

Expand Down
6 changes: 6 additions & 0 deletions cli/DB2Options.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,11 @@ internal class DB2Options

[Option('p', "password", Required = true, HelpText = "The authenticated password to access the DB.")]
public string Password { get; set; }

[Option('x', "xsd", Required = false, HelpText = "The path to an XML Schema (XSD) that should be used to validate the structure of each XMl document processed.")]
public string SchemaPath { get; set; }

[Option('s', "sch", Required = false, HelpText = "The path to an ISO Schematron (SCH) file that should be used to validate the content of each XMl document processed.")]
public string SchematronPath { get; set; }
}
}
6 changes: 6 additions & 0 deletions cli/MDBOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,11 @@ internal class MDBOptions

[Option('m', "move", Required = false, HelpText = "The directory to move input files to once they are done being processed.")]
public string MoveDirectory { get; set; }

[Option('x', "xsd", Required = false, HelpText = "The path to an XML Schema (XSD) that should be used to validate the structure of each XMl document processed.")]
public string SchemaPath { get; set; }

[Option('s', "sch", Required = false, HelpText = "The path to an ISO Schematron (SCH) file that should be used to validate the content of each XMl document processed.")]
public string SchematronPath { get; set; }
}
}
6 changes: 3 additions & 3 deletions cli/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ static void Main(string[] args)
var result = Parser.Default.ParseArguments<XLSXOptions, MDBOptions, DB2Options>(args)
.WithParsed<XLSXOptions>(o =>
{
XlsxConverter xlsxConverter = new XlsxConverter(o.MappingConfig, o.InputDirectory, o.OutputDirectory, o.MoveDirectory);
XlsxConverter xlsxConverter = new XlsxConverter(o.MappingConfig, o.InputDirectory, o.OutputDirectory, o.MoveDirectory, o.SchemaPath, o.SchematronPath);
xlsxConverter.LogEvent += delegate (string logText)
{
Console.WriteLine(logText);
Expand All @@ -24,7 +24,7 @@ static void Main(string[] args)
})
.WithParsed<MDBOptions>(o =>
{
MSAccessConverter mdbConverter = new MSAccessConverter(o.MappingConfig, o.InputDirectory, o.OutputDirectory, o.MoveDirectory);
MSAccessConverter mdbConverter = new MSAccessConverter(o.MappingConfig, o.InputDirectory, o.OutputDirectory, o.MoveDirectory, o.SchemaPath, o.SchematronPath);
mdbConverter.LogEvent += delegate (string logText)
{
Console.WriteLine(logText);
Expand All @@ -33,7 +33,7 @@ static void Main(string[] args)
})
.WithParsed<DB2Options>(o =>
{
DB2Converter db2Converter = new DB2Converter(o.MappingConfig, o.InputDirectory, o.Database, o.Username, o.Password, o.MoveDirectory);
DB2Converter db2Converter = new DB2Converter(o.MappingConfig, o.InputDirectory, o.Database, o.Username, o.Password, o.MoveDirectory, o.SchemaPath, o.SchematronPath);
db2Converter.LogEvent += delegate (string logText)
{
Console.WriteLine(logText);
Expand Down
6 changes: 6 additions & 0 deletions cli/XLSXOptions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -27,5 +27,11 @@ internal class XLSXOptions

[Option('m', "move", Required = false, HelpText = "The directory to move input files to once they are done being processed.")]
public string MoveDirectory { get; set; }

[Option('x', "xsd", Required = false, HelpText = "The path to an XML Schema (XSD) that should be used to validate the structure of each XMl document processed.")]
public string SchemaPath { get; set; }

[Option('s', "sch", Required = false, HelpText = "The path to an ISO Schematron (SCH) file that should be used to validate the content of each XMl document processed.")]
public string SchematronPath { get; set; }
}
}
45 changes: 42 additions & 3 deletions lib/BaseConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ public abstract class BaseConverter
protected DocumentBuilder builder;
protected XPathCompiler compiler;

protected BaseConverter(string configFileName, string inputDirectory, string moveDirectory)
private Validator validator;

protected BaseConverter(string configFileName, string inputDirectory, string moveDirectory, string schemaPath, string schematronPath)
{
this.config = MappingConfig.LoadFromFileWithParents(configFileName);
this.inputDirectory = inputDirectory;
Expand All @@ -36,6 +38,8 @@ protected BaseConverter(string configFileName, string inputDirectory, string mov

foreach (var theNs in this.config.Namespace)
this.compiler.DeclareNamespace(theNs.Prefix, theNs.Uri);

this.validator = new Validator(schemaPath, schematronPath);
}

protected abstract int InsertData(string tableName, Dictionary<MappingColumn, object> columns);
Expand Down Expand Up @@ -189,7 +193,7 @@ public void Convert()
FileInfo fileInfo = new FileInfo(xmlFile);
XmlDocument xmlDoc = null;

this.Log("---------------------------------------------\r\nReading XML file: " + fileInfo.Name + "\r\n");
this.Log("---------------------------------------------\r\nReading XML file: " + fileInfo.Name);

try
{
Expand All @@ -212,16 +216,51 @@ public void Convert()
try
{
this.ProcessFile(xmlDoc, nsManager, fileInfo);
}
catch (Exception ex)
{
this.Log(String.Format("Failed to process file {0} data due to: {1}", fileInfo.Name, ex.Message));
break;
}

try
{
// Always run Validate(). If not configured with validation schema and/or schematron, it will just return "valid"
bool isSchemaValid = this.validator.ValidateSchema(xmlFile);
bool isSchematronValid = this.validator.ValidateSchematron(xmlFile);

if (this.validator.WillValidate)
{
this.Log("Validation results:");

if (this.validator.WillValidateSchema)
this.Log(string.Format("Schema (XSD): {0}", isSchemaValid ? "valid" : "not valid"));
else
this.Log("Schema (XSD): n/a");

if (this.validator.WillValidateSchematron)
this.Log(string.Format("Schematron (SCH): {0}", isSchematronValid ? "valid" : "not valid"));
else
this.Log("Schematron (SCH): n/a");
}

if (!String.IsNullOrEmpty(this.moveDirectory))
{
string destinationFilePath = Path.Combine(this.moveDirectory, fileInfo.Name);

// If configured to validate, move the file to a subdirectory "valid" or "invalid" depending on the validation results
if (this.validator.WillValidate)
destinationFilePath = Path.Combine(destinationFilePath, isSchemaValid ? "valid" : "invalid");

fileInfo.MoveTo(destinationFilePath);
}

this.Log(string.Format("Done processing file {0}", fileInfo.Name));
}
catch (Exception ex)
{
this.Log(String.Format("Failed to process file {0} data due to: {1}", fileInfo.Name, ex.Message));
this.Log(String.Format("Failed to validate and/or move file {0} data due to: {1}", fileInfo.Name, ex.Message));
break;
}
}
}
Expand Down
4 changes: 2 additions & 2 deletions lib/DB2Converter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ public class DB2Converter : BaseConverter

private DbConnection conn;

public DB2Converter(string configFileName, string inputDirectory, string database, string username, string password, string moveDirectory) :
base(configFileName, inputDirectory, moveDirectory)
public DB2Converter(string configFileName, string inputDirectory, string database, string username, string password, string moveDirectory, string schemaPath, string schematronPath) :
base(configFileName, inputDirectory, moveDirectory, schemaPath, schematronPath)
{
this.database = database;
this.username = username;
Expand Down
4 changes: 2 additions & 2 deletions lib/MSAccessConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ public class MSAccessConverter : BaseConverter
private string outputDirectory;
private OleDbConnection conn;

public MSAccessConverter(string configFileName, string inputDirectory, string outputDirectory, string moveDirectory) :
base(configFileName, inputDirectory, moveDirectory)
public MSAccessConverter(string configFileName, string inputDirectory, string outputDirectory, string moveDirectory, string schemaPath, string schematronPath) :
base(configFileName, inputDirectory, moveDirectory, schemaPath, schematronPath)
{
this.outputDirectory = outputDirectory;
}
Expand Down
150 changes: 150 additions & 0 deletions lib/Validator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
using Saxon.Api;
using System;
using System.IO;
using System.Reflection;
using System.Text;
using System.Xml;
using System.Xml.Schema;

namespace LantanaGroup.XmlDocumentConverter
{
public class Validator
{
private string schemaPath;
private string schematronPath;
private Processor processor = new Processor();
private XmlReaderSettings readerSettings;
private DocumentBuilder builder;
private string phase1;

public bool WillValidate
{
get
{
return !string.IsNullOrEmpty(this.schemaPath) || !string.IsNullOrEmpty(this.schematronPath);
}
}

public bool WillValidateSchema
{
get
{
return !string.IsNullOrEmpty(this.schemaPath);
}
}

public bool WillValidateSchematron
{
get
{
return !string.IsNullOrEmpty(this.schematronPath);
}
}

public Validator(string schemaPath, string schematronPath)
{
this.schemaPath = schemaPath;
this.schematronPath = schematronPath;

this.processor.XmlResolver = new ValidatorResolver(new FileInfo(schematronPath).DirectoryName);
this.builder = this.processor.NewDocumentBuilder();
this.builder.BaseUri = new Uri("file://");

if (!String.IsNullOrEmpty(this.schemaPath))
{
XmlDocument xsdDoc = new XmlDocument();
xsdDoc.Load(this.schemaPath);

if (xsdDoc.DocumentElement != null && xsdDoc.DocumentElement.Attributes != null && xsdDoc.DocumentElement.Attributes.GetNamedItem("targetNamespace") != null)
{
XmlAttribute targetNamespace = (XmlAttribute) xsdDoc.DocumentElement.Attributes.GetNamedItem("targetNamespace");

this.readerSettings = new XmlReaderSettings();
this.readerSettings.Schemas.Add(targetNamespace.Value, this.schemaPath);
this.readerSettings.ValidationType = ValidationType.Schema;
}
}
}

public bool ValidateSchema(string filePath)
{
bool isValid = true;

// Validate XSD
if (this.readerSettings != null)
{
this.readerSettings.ValidationEventHandler += delegate (object sender, ValidationEventArgs e)
{
if (e.Severity == XmlSeverityType.Error) isValid = false;
};
XmlReader books = XmlReader.Create(filePath, this.readerSettings);

while (books.Read()) { }
}

return isValid;
}

private string Transform(string stylesheetContent, string xmlPath, bool addErrorsPhase = false)
{
FileInfo xmlInfo = new FileInfo(xmlPath);
XdmNode stylesheet = this.builder.Build(new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(stylesheetContent))));

XsltCompiler compiler = this.processor.NewXsltCompiler();
XsltExecutable exec = compiler.Compile(stylesheet);

XsltTransformer transformer = exec.Load();
DomDestination dest = new DomDestination();

using (var inputStream = xmlInfo.OpenRead())
{
if (addErrorsPhase)
transformer.SetParameter(new QName("phase"), XdmValue.MakeValue("errors"));

transformer.SetInputStream(inputStream, new Uri(xmlInfo.DirectoryName));
transformer.Run(dest);
}

using (StringWriter sw = new StringWriter())
{
if (dest.XmlDocument == null)
throw new Exception("Failed to execute Schematron validation. The SCH file selected may not be the ISO version of Schematron.");

dest.XmlDocument.Save(sw);
return sw.ToString();
}
}

public bool ValidateSchematron(string filePath)
{
if (string.IsNullOrEmpty(this.schematronPath)) return true;

if (string.IsNullOrEmpty(this.phase1))
{
string schSkeletonContent;

using (Stream schSkeletonStream = Assembly.GetExecutingAssembly().GetManifestResourceStream("LantanaGroup.XmlDocumentConverter.iso-sch-conformance1-5.xsl"))
{
using (StreamReader schSkeletonReader = new StreamReader(schSkeletonStream))
{
schSkeletonContent = schSkeletonReader.ReadToEnd();
}
}

this.phase1 = this.Transform(schSkeletonContent, this.schematronPath, true);
}

string phase2 = this.Transform(this.phase1, filePath);

XdmNode phase2Results = this.builder.Build(new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(phase2))));

XPathCompiler xpathCompiler = this.processor.NewXPathCompiler();
XPathExecutable xpathExec = xpathCompiler.Compile("//failed-assert");
XPathSelector xpathSelector = xpathExec.Load();
xpathSelector.ContextItem = phase2Results;
var xpathResults = xpathSelector.Evaluate();

return xpathResults.Count == 0;
}
}
}
41 changes: 41 additions & 0 deletions lib/ValidatorResolver.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
using Saxon.Api;
using System;
using System.IO;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using System.Xml;

namespace LantanaGroup.XmlDocumentConverter
{
public class ValidatorResolver : XmlResolver
{
private readonly string rootPath;

public ValidatorResolver(string rootPath)
{
this.rootPath = rootPath;
}

public override object GetEntity(Uri absoluteUri, string role, Type ofObjectToReturn)
{
Uri baseUri = new Uri("file://" + this.rootPath);

if (absoluteUri.ToString().EndsWith("skeleton1-5.xsl"))
{
return System.Reflection.Assembly.GetExecutingAssembly().GetManifestResourceStream("LantanaGroup.XmlDocumentConverter.iso-sch-skeleton1-5.xsl");
}
else if (absoluteUri.ToString().StartsWith("file:///"))
{
string filePath = absoluteUri.ToString().Substring(8);
string actualPath = System.IO.Path.Combine(this.rootPath, filePath);

if (new FileInfo(actualPath).Exists)
return new System.IO.FileStream(actualPath, FileMode.Open);
}

throw new NotImplementedException();
}
}
}
4 changes: 2 additions & 2 deletions lib/XlsxConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ public class XlsxConverter : BaseConverter
private SpreadsheetDocument spreadsheet;
private ExcelFormat excelFormat;

public XlsxConverter(string configFileName, string inputDirectory, string outputDirectory, string moveDirectory) :
base(configFileName, inputDirectory, moveDirectory)
public XlsxConverter(string configFileName, string inputDirectory, string outputDirectory, string moveDirectory, string schemaPath, string schematronPath) :
base(configFileName, inputDirectory, moveDirectory, schemaPath, schematronPath)
{
this.outputDirectory = outputDirectory;
}
Expand Down
Loading

0 comments on commit 6455a76

Please sign in to comment.