Skip to content

Commit

Permalink
Make order of the extracted fields stable (Set --> List) and Fail whe…
Browse files Browse the repository at this point in the history
…n asking for a field that cannot be extracted
  • Loading branch information
nielsbasjes committed Jul 4, 2017
1 parent 724f967 commit fb3fcc7
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 20 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG
Expand Up @@ -126,5 +126,7 @@ v1.4
- Fixed false positive "Honor Note" matches SQL fragment "OR NOT"
- Detect Coc Coc Browser
- Improved commandline tool
- Make order of the extracted fields stable (Set --> List)
- Fail when asking for a field that cannot be extracted


34 changes: 27 additions & 7 deletions analyzer/src/main/java/nl/basjes/parse/useragent/UserAgent.java
Expand Up @@ -58,6 +58,7 @@ public class UserAgent extends UserAgentBaseListener implements Serializable, AN
public static final String AGENT_VERSION_MAJOR = "AgentVersionMajor";

public static final String SYNTAX_ERROR = "__SyntaxError__";
public static final String USERAGENT = "Useragent";

public static final String SET_ALL_FIELDS = "__Set_ALL_Fields__";
public static final String NULL_VALUE = "<<<null>>>";
Expand Down Expand Up @@ -286,9 +287,10 @@ public void reset() {
}
}

private boolean isSystemField(String fieldname) {
static boolean isSystemField(String fieldname) {
return SET_ALL_FIELDS.equals(fieldname) ||
SYNTAX_ERROR.equals(fieldname);
SYNTAX_ERROR.equals(fieldname) ||
USERAGENT.equals(fieldname);
}

public void processSetAll() {
Expand Down Expand Up @@ -335,10 +337,19 @@ private void set(String fieldName, AgentField agentField) {
}

public AgentField get(String fieldName) {
return allFields.get(fieldName);
if (USERAGENT.equals(fieldName)) {
AgentField agentField = new AgentField(userAgentString);
agentField.setValue(userAgentString, 0L);
return agentField;
} else {
return allFields.get(fieldName);
}
}

public String getValue(String fieldName) {
if (USERAGENT.equals(fieldName)) {
return userAgentString;
}
AgentField field = allFields.get(fieldName);
if (field == null) {
return UNKNOWN_VALUE;
Expand All @@ -347,6 +358,9 @@ public String getValue(String fieldName) {
}

public Long getConfidence(String fieldName) {
if (USERAGENT.equals(fieldName)) {
return 0L;
}
AgentField field = allFields.get(fieldName);
if (field == null) {
return -1L;
Expand Down Expand Up @@ -434,21 +448,27 @@ public String toJson(List<String> fieldNames) {
StringBuilder sb = new StringBuilder(10240);
sb.append("{");

boolean addSeparator = false;
for (String fieldName : fieldNames) {
if (addSeparator) {
sb.append(',');
} else {
addSeparator = true;
}
if ("Useragent".equals(fieldName)) {
sb
.append("\"Useragent\"")
.append(':')
.append('"').append(StringEscapeUtils.escapeJson(getUserAgentString())).append('"')
.append(',');
.append('"').append(StringEscapeUtils.escapeJson(getUserAgentString())).append('"');
} else {
sb
.append('"').append(StringEscapeUtils.escapeJson(fieldName)).append('"')
.append(':')
.append('"').append(StringEscapeUtils.escapeJson(getValue(fieldName))).append('"')
.append(',');
.append('"').append(StringEscapeUtils.escapeJson(getValue(fieldName))).append('"');
}
}

sb.append("}");
return sb.toString();
}

Expand Down
Expand Up @@ -94,7 +94,7 @@ public class UserAgentAnalyzer extends Analyzer implements Serializable {
private boolean doingOnlyASingleTest = false;

// If we want ALL fields this is null. If we only want specific fields this is a list of names.
protected Set<String> wantedFieldNames = null;
protected List<String> wantedFieldNames = null;

protected List<Map<String, Map<String, String>>> testCases = new ArrayList<>(2048);
private Map<String, Map<String, String>> lookups = new HashMap<>(128);
Expand Down Expand Up @@ -157,8 +157,31 @@ public UserAgentAnalyzer setShowMatcherStats(boolean newShowMatcherStats) {
protected void initialize() {
logVersion();
loadResources("classpath*:UserAgents/**/*.yaml");
verifyWeAreNotAskingForImpossibleFields();
}

protected void verifyWeAreNotAskingForImpossibleFields() {
if (wantedFieldNames == null) {
return; // Nothing to check
}
List<String> impossibleFields = new ArrayList<>();
List<String> allPossibleFields = getAllPossibleFieldNamesSorted();

for (String wantedFieldName: wantedFieldNames) {
if (UserAgent.isSystemField(wantedFieldName)) {
continue; // These are fine
}
if (!allPossibleFields.contains(wantedFieldName)) {
impossibleFields.add(wantedFieldName);
}
}
if (impossibleFields.isEmpty()) {
return;
}
throw new InvalidParserConfigurationException("We cannot provide these fields:" + impossibleFields.toString());
}


public static void logVersion(String... extraLines) {
String[] lines = {
"For more information: https://github.com/nielsbasjes/yauaa",
Expand Down Expand Up @@ -925,7 +948,7 @@ public Builder withoutCache() {

public Builder withField(String fieldName) {
if (uaa.wantedFieldNames == null) {
uaa.wantedFieldNames = new HashSet<>(32);
uaa.wantedFieldNames = new ArrayList<>(32);
}
uaa.wantedFieldNames.add(fieldName);
return this;
Expand Down
Expand Up @@ -61,7 +61,7 @@ public class Matcher implements Serializable {

public Matcher(Analyzer analyzer,
Map<String, Map<String, String>> lookups,
Set<String> wantedFieldNames,
List<String> wantedFieldNames,
MappingNode matcherConfig,
String filename) throws UselessMatcherException {
this.lookups = lookups;
Expand Down
Expand Up @@ -20,7 +20,11 @@
import nl.basjes.parse.useragent.UserAgent;
import nl.basjes.parse.useragent.UserAgentAnalyzer;
import org.junit.Assert;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;

import static org.junit.Assert.fail;

public class TestBuilder {

Expand Down Expand Up @@ -68,4 +72,23 @@ public void testLimitedFields() {
Assert.assertEquals(-1, parsedAgent.get("AgentClass" ).getConfidence()); // Browser
}

@Rule
public ExpectedException expectedEx = ExpectedException.none();

@Test
public void testAskingForImpossibleField() {
expectedEx.expect(InvalidParserConfigurationException.class);
expectedEx.expectMessage("We cannot provide these fields:[FirstNonexistentField, SecondNonexistentField]");

UserAgentAnalyzer userAgentAnalyzer =
UserAgentAnalyzer
.newBuilder()
.withoutCache()
.hideMatcherLoadStats()
.withField("FirstNonexistentField")
.withField("DeviceClass")
.withField("SecondNonexistentField")
.build();
}

}
Expand Up @@ -37,6 +37,7 @@
import java.io.InputStreamReader;
import java.util.List;

import static nl.basjes.parse.useragent.UserAgent.USERAGENT;
import static nl.basjes.parse.useragent.commandline.Main.OutputFormat.CSV;
import static nl.basjes.parse.useragent.commandline.Main.OutputFormat.JSON;
import static nl.basjes.parse.useragent.commandline.Main.OutputFormat.YAML;
Expand All @@ -51,8 +52,6 @@ enum OutputFormat {
CSV, JSON, YAML
}

private static final String USERAGENT = "Useragent";

private static void printHeader(OutputFormat outputFormat, List<String> fields) {
switch (outputFormat) {
case CSV:
Expand Down Expand Up @@ -82,13 +81,9 @@ private static void printAgent(OutputFormat outputFormat, List<String> fields, U
} else {
doSeparator = true;
}
if (USERAGENT.equals(field)) {
System.out.println(agent.getUserAgentString());
} else {
String value = agent.getValue(field);
if (value != null) {
System.out.print(value);
}
String value = agent.getValue(field);
if (value != null) {
System.out.print(value);
}
}
System.out.println();
Expand Down Expand Up @@ -325,7 +320,7 @@ private static class CommandOptions {
private boolean jsonFormat = false;

@Option(name = "-fields", handler = StringArrayOptionHandler.class,
usage = "A list of the desired fieldnames (use 'Useragent' if you want the input value aswell)")
usage = "A list of the desired fieldnames (use '"+USERAGENT+"' if you want the input value aswell)")
private List<String> fields = null;

@Option(name = "-cache", usage = "The number of elements that can be cached (LRU).")
Expand Down

0 comments on commit fb3fcc7

Please sign in to comment.