Skip to content

Commit

Permalink
Merge branch 'issue-681' of https://github.com/UW-Madison-Library/jhove
Browse files Browse the repository at this point in the history
… into merge/madlib/issue-681
  • Loading branch information
carlwilson committed Apr 10, 2022
2 parents e5b487c + 04540bf commit 3c48e70
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 2 deletions.
11 changes: 11 additions & 0 deletions jhove-core/src/main/java/edu/harvard/hul/ois/jhove/JhoveBase.java
Original file line number Diff line number Diff line change
Expand Up @@ -1115,6 +1115,17 @@ public void setCurrentThread(Thread t) {
_currentThread = t;
}

/**
* Sets the maximum number of bytes to check, for modules that look for
* an indefinitely positioned signature or check the first sigBytes bytes
* in lieu of a signature.
*
* @param sigBytes max number of bytes to check
*/
public void setSigBytes(int sigBytes) {
_sigBytes = sigBytes;
}

/**
* Resets the abort flag. This must be called at the beginning of any
* activity for which the abort flag may subsequently be set.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.text.MessageFormat;
import java.util.*;
import javax.xml.parsers.SAXParserFactory;
Expand Down Expand Up @@ -919,11 +920,11 @@ public void checkSignatures(File file, InputStream stream, RepInfo info)
int sigidx = 0;
JhoveBase jb = getBase();
int sigBytes = jb.getSigBytes();
DataInputStream dstream = new DataInputStream(stream);
Reader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
int charsRead = 0;
try {
while (charsRead < sigBytes) {
char ch = dstream.readChar();
char ch = (char) reader.read();
++charsRead;
// Skip over all whitespace till we reach "xml"
if (sigidx <= 2 && Character.isWhitespace(ch)) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package edu.harvard.hul.ois.jhove.module;

import edu.harvard.hul.ois.jhove.JhoveBase;
import edu.harvard.hul.ois.jhove.JhoveException;
import edu.harvard.hul.ois.jhove.RepInfo;
import org.junit.Before;
import org.junit.Test;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;

import static org.junit.Assert.assertEquals;

public class XmlModuleTest {

private static final String RESOURCE_DIR = "src/test/resources/edu/harvard/hul/ois/jhove/module/";
private static final String MODULE_NAME = "XML-hul";
private static final int SIG_BYTES = 1024;

private XmlModule module;

@Before
public void setup() throws JhoveException {
JhoveBase base = new JhoveBase();
base.setSigBytes(SIG_BYTES);
module = new XmlModule();
module.setBase(base);
}

@Test
public void shouldDetectXmlWhenHasDeclarationAndNotWellFormed() throws IOException {
File file = new File(RESOURCE_DIR + "not-well-formed.xml");
RepInfo info = new RepInfo(file.toURI().toString());

module.checkSignatures(file, new FileInputStream(file), info);

assertEquals(1, info.getSigMatch().size());
assertEquals(MODULE_NAME, info.getSigMatch().get(0));
assertEquals(RepInfo.TRUE, info.getWellFormed());
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<outer>
<inner>
blah
</outer>
</inner>

0 comments on commit 3c48e70

Please sign in to comment.