Skip to content

Commit

Permalink
Merge pull request #16551 from cavelo-anoop/tika_parser_config
Browse files Browse the repository at this point in the history
Use setter method to derive the type of property
  • Loading branch information
sberyozkin committed Apr 15, 2021
2 parents 5e749f2 + 66635ca commit 1321e3f
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 9 deletions.
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package io.quarkus.tika.deployment;

import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
Expand Down Expand Up @@ -222,18 +223,24 @@ private static Class<?> loadParserClass(String parserName) {
private static String getParserParamType(String parserName, String paramName) {
try {
Class<?> parserClass = loadParserClass(parserName);
String paramType = parserClass.getMethod("get" + capitalize(paramName), new Class[] {}).getReturnType()
.getSimpleName().toLowerCase();
if (paramType.equals(boolean.class.getSimpleName())) {
// TikaConfig Param class does not recognize 'boolean', only 'bool'
// This whole reflection code is temporary anyway
paramType = "bool";
Method[] methods = parserClass.getMethods();
String setterMethodName = "set" + capitalize(paramName);
String paramType = null;
for (Method method : methods) {
if (method.getName().equals(setterMethodName) && method.getParameterCount() == 1) {
paramType = method.getParameterTypes()[0].getSimpleName().toLowerCase();
if (paramType.equals(boolean.class.getSimpleName())) {
// TikaConfig Param class does not recognize 'boolean', only 'bool'
// This whole reflection code is temporary anyway
paramType = "bool";
}
return paramType;
}
}
return paramType;
} catch (Throwable t) {
final String errorMessage = "Parser " + parserName + " has no " + paramName + " property";
throw new TikaParseException(errorMessage);
throw new TikaParseException(String.format("Parser %s has no %s property", parserName, paramName));
}
throw new TikaParseException(String.format("Parser %s has no %s property", parserName, paramName));
}

public static class TikaParserParameter {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,35 @@ public void testPdfParserConfig() throws Exception {
assertEquals("true", parserConfig.get(pdfParserFullName).get(0).getValue());
}

@Test
public void testTesseractParserConfig() throws Exception {
String ocrParserFullName = "org.apache.tika.parser.ocr.TesseractOCRParser";
Map<String, List<TikaProcessor.TikaParserParameter>> parserConfig = getParserConfig(null, "ocr",
Collections.singletonMap("ocr",
Collections.singletonMap("tesseract-path", "/opt/tesseract/")),
Collections.singletonMap("ocr", ocrParserFullName));
assertEquals(1, parserConfig.size());

assertEquals(1, parserConfig.get(ocrParserFullName).size());
assertEquals("tesseractPath", parserConfig.get(ocrParserFullName).get(0).getName());
assertEquals("/opt/tesseract/", parserConfig.get(ocrParserFullName).get(0).getValue());
}

@Test
public void testUnknownParserConfig() throws Exception {
String ocrParserFullName = "org.apache.tika.parser.ocr.TesseractOCRParser";
try {
Map<String, List<TikaProcessor.TikaParserParameter>> parserConfig = getParserConfig(null, "ocr",
Collections.singletonMap("ocr",
Collections.singletonMap("tesseract-unknown-opt", "/opt/tesseract/")),
Collections.singletonMap("ocr", ocrParserFullName));
} catch (Exception e) {
// expected
assertEquals("Parser org.apache.tika.parser.ocr.TesseractOCRParser has no tesseractUnknownOpt property",
e.getMessage());
}
}

@Test
public void testUnresolvableCustomAbbreviation() throws Exception {
try {
Expand Down

0 comments on commit 1321e3f

Please sign in to comment.