diff --git a/README.md b/README.md index d36f41d..a7730f1 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,14 @@ Log-Parser ---------- -[![Build Status](https://travis-ci.org/sangupta/log-parser.svg?branch=master)](https://travis-ci.org/sangupta/log-parser) -[![Coverage Status](https://coveralls.io/repos/github/sangupta/log-parser/badge.svg?branch=master)](https://coveralls.io/github/sangupta/log-parser?branch=master) +[![Travis](https://img.shields.io/travis/sangupta/log-parser.svg)]() +[![Coveralls](https://img.shields.io/coveralls/sangupta/log-parser.svg)]() [![Maven Version](https://maven-badges.herokuapp.com/maven-central/com.sangupta/log-parser/badge.svg)](https://maven-badges.herokuapp.com/maven-central/com.sangupta/log-parser) +[![license](https://img.shields.io/github/license/sangupta/log-parser.svg)]() -A simple Java libary to parse various known log file formats into strongly-typed format-specific Java object. Once data is into a strongly typed object, its easier to run analysis on large files. +A simple Java libary to parse various known log file formats into strongly-typed +format-specific Java object. Once data is into a strongly typed object, its easier +to run analysis on large files. Formats currently supported are: @@ -16,7 +19,7 @@ Formats currently supported are: * Adobe Experience Manager Replication logs * Adobe Experience Manager Tar Optimization logs * Sun/Oracle JDK GC logs -* Tomcat access logs (default format) +* Apache Tomcat access logs (default format) ### RoadMap @@ -41,42 +44,38 @@ $ mvn clean package You may include the library in your Maven project by adding the following to the `pom.xml` ```xml - - - jitpack.io - https://jitpack.io - - - - - com.github.sangupta - log-parser - -SNAPSHOT - + + + jitpack.io + https://jitpack.io + + + + + com.github.sangupta + log-parser + -SNAPSHOT + ``` License ------- ``` -/** - * - * log-parser: Parsers for various log formats - * Copyright (c) 2015-2016, Sandeep Gupta - * - * http://sangupta.com/projects/log-parser - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ +log-parser: Parsers for various log formats +Copyright (c) 2015-2016, Sandeep Gupta + +https://sangupta.com/projects/log-parser + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. ``` diff --git a/pom.xml b/pom.xml index 50d040c..faaca34 100644 --- a/pom.xml +++ b/pom.xml @@ -65,12 +65,12 @@ com.sangupta jerry-core - 2.4.0 + 3.0.0 org.apache.commons commons-lang3 - 3.4 + 3.5 diff --git a/src/main/java/com/sangupta/logparser/aem/audit/AEMAuditLogParser.java b/src/main/java/com/sangupta/logparser/aem/audit/AEMAuditLogParser.java index 98771a5..d419df3 100644 --- a/src/main/java/com/sangupta/logparser/aem/audit/AEMAuditLogParser.java +++ b/src/main/java/com/sangupta/logparser/aem/audit/AEMAuditLogParser.java @@ -3,10 +3,10 @@ import java.io.BufferedReader; import java.io.IOException; +import com.sangupta.jerry.io.AdvancedStringReader; import com.sangupta.jerry.util.AssertUtils; import com.sangupta.logparser.LogParser; import com.sangupta.logparser.LogParserUtils; -import com.sangupta.logparser.common.StringTokenReader; public class AEMAuditLogParser implements LogParser { @@ -24,7 +24,7 @@ public AEMAuditLogLine parseLogLine(String logLine) { } AEMAuditLogLine line = new AEMAuditLogLine(); - StringTokenReader reader = new StringTokenReader(logLine); + AdvancedStringReader reader = new AdvancedStringReader(logLine); if(reader.hasNext()) { line.timestamp = LogParserUtils.parseIntoTime(DATE_PATTERN, reader.readTillNext('['), -1); } diff --git a/src/main/java/com/sangupta/logparser/aem/error/AEMErrorLogParser.java b/src/main/java/com/sangupta/logparser/aem/error/AEMErrorLogParser.java index c7054f2..d495819 100644 --- a/src/main/java/com/sangupta/logparser/aem/error/AEMErrorLogParser.java +++ b/src/main/java/com/sangupta/logparser/aem/error/AEMErrorLogParser.java @@ -24,11 +24,11 @@ import java.io.BufferedReader; import java.io.IOException; +import com.sangupta.jerry.io.AdvancedStringReader; import com.sangupta.jerry.util.AssertUtils; import com.sangupta.jerry.util.StringUtils; import com.sangupta.logparser.LogParser; import com.sangupta.logparser.LogParserUtils; -import com.sangupta.logparser.common.StringTokenReader; /** * A {@link LogParser} implementation to parse error.log @@ -123,7 +123,7 @@ public AEMErrorLogLine parseLogLine(String logLine) { } AEMErrorLogLine line = new AEMErrorLogLine(); - StringTokenReader reader = new StringTokenReader(logLine); + AdvancedStringReader reader = new AdvancedStringReader(logLine); if(reader.hasNext()) { line.timestamp = LogParserUtils.parseIntoTime(DATE_TIME_PATTERN, reader.readTillNext('*'), -1); } @@ -137,7 +137,7 @@ public AEMErrorLogLine parseLogLine(String logLine) { } if(reader.hasNext()) { - splitMessageAndStackTrace(line, reader.getRemaining()); + splitMessageAndStackTrace(line, reader.readRemaining()); } return line; diff --git a/src/main/java/com/sangupta/logparser/aem/replication/AEMReplicationLogParser.java b/src/main/java/com/sangupta/logparser/aem/replication/AEMReplicationLogParser.java index 1798278..668de70 100644 --- a/src/main/java/com/sangupta/logparser/aem/replication/AEMReplicationLogParser.java +++ b/src/main/java/com/sangupta/logparser/aem/replication/AEMReplicationLogParser.java @@ -3,10 +3,10 @@ import java.io.BufferedReader; import java.io.IOException; +import com.sangupta.jerry.io.AdvancedStringReader; import com.sangupta.jerry.util.AssertUtils; import com.sangupta.logparser.LogParser; import com.sangupta.logparser.LogParserUtils; -import com.sangupta.logparser.common.StringTokenReader; public class AEMReplicationLogParser implements LogParser { @@ -24,7 +24,7 @@ public AEMReplicationLogLine parseLogLine(String logLine) { } AEMReplicationLogLine line = new AEMReplicationLogLine(); - StringTokenReader reader = new StringTokenReader(logLine); + AdvancedStringReader reader = new AdvancedStringReader(logLine); if(reader.hasNext()) { String date = reader.readTillNext('*'); line.timestamp = LogParserUtils.parseIntoTime(DATE_PATTERN, date, -1); @@ -42,7 +42,7 @@ public AEMReplicationLogLine parseLogLine(String logLine) { line.clazz = reader.readTillNext(' ', 2).trim(); } - line.message = reader.getRemaining(); + line.message = reader.readRemaining(); return line; } diff --git a/src/main/java/com/sangupta/logparser/aem/request/AEMRequestLogParser.java b/src/main/java/com/sangupta/logparser/aem/request/AEMRequestLogParser.java index 2078d2f..27d657f 100644 --- a/src/main/java/com/sangupta/logparser/aem/request/AEMRequestLogParser.java +++ b/src/main/java/com/sangupta/logparser/aem/request/AEMRequestLogParser.java @@ -24,12 +24,12 @@ import java.io.BufferedReader; import java.io.IOException; +import com.sangupta.jerry.io.AdvancedStringReader; import com.sangupta.jerry.util.AssertUtils; import com.sangupta.logparser.LogParser; import com.sangupta.logparser.LogParserUtils; import com.sangupta.logparser.common.HttpRequest; import com.sangupta.logparser.common.HttpVerb; -import com.sangupta.logparser.common.StringTokenReader; /** * A {@link LogParser} that can be used to parse request.log files @@ -54,7 +54,7 @@ public AEMRequestLogLine parseLogLine(String logLine) { } AEMRequestLogLine line = new AEMRequestLogLine(); - StringTokenReader reader = new StringTokenReader(logLine); + AdvancedStringReader reader = new AdvancedStringReader(logLine); if(reader.hasNext()) { line.timestamp = LogParserUtils.parseIntoTime(DATE_TIME_PATTERN, reader.readTillNext('['), -1); } @@ -84,7 +84,7 @@ public AEMRequestLogLine parseLogLine(String logLine) { } if(reader.hasNext()) { - line.request.httpVersion = reader.getRemaining(); + line.request.httpVersion = reader.readRemaining(); } } @@ -93,7 +93,7 @@ public AEMRequestLogLine parseLogLine(String logLine) { line.statusCode = Integer.parseInt(reader.readTillNext(' ')); } - String remain = reader.getRemaining(); + String remain = reader.readRemaining(); int space = remain.lastIndexOf(' '); line.mime = remain.substring(0, space).trim(); diff --git a/src/main/java/com/sangupta/logparser/aem/tar/AEMTarOptimizationLogParser.java b/src/main/java/com/sangupta/logparser/aem/tar/AEMTarOptimizationLogParser.java index 6ae8bc0..a843930 100644 --- a/src/main/java/com/sangupta/logparser/aem/tar/AEMTarOptimizationLogParser.java +++ b/src/main/java/com/sangupta/logparser/aem/tar/AEMTarOptimizationLogParser.java @@ -3,10 +3,10 @@ import java.io.BufferedReader; import java.io.IOException; +import com.sangupta.jerry.io.AdvancedStringReader; import com.sangupta.jerry.util.AssertUtils; import com.sangupta.logparser.LogParser; import com.sangupta.logparser.LogParserUtils; -import com.sangupta.logparser.common.StringTokenReader; public class AEMTarOptimizationLogParser implements LogParser { @@ -24,7 +24,7 @@ public AEMTarOptimizationLogLine parseLogLine(String logLine) { } AEMTarOptimizationLogLine line = new AEMTarOptimizationLogLine(); - StringTokenReader reader = new StringTokenReader(logLine); + AdvancedStringReader reader = new AdvancedStringReader(logLine); if(reader.hasNext()) { String date = reader.readTillNext('*'); line.timestamp = LogParserUtils.parseIntoTime(DATE_PATTERN, date, -1); @@ -64,7 +64,7 @@ public AEMTarOptimizationLogLine parseLogLine(String logLine) { if(reader.hasNext()) { reader.readTillNext("optimize:"); - line.optimize = LogParserUtils.asLong(reader.getRemaining()); + line.optimize = LogParserUtils.asLong(reader.readRemaining()); } return line; diff --git a/src/main/java/com/sangupta/logparser/common/StringTokenReader.java b/src/main/java/com/sangupta/logparser/common/StringTokenReader.java deleted file mode 100644 index 7def147..0000000 --- a/src/main/java/com/sangupta/logparser/common/StringTokenReader.java +++ /dev/null @@ -1,171 +0,0 @@ -/** - * - * log-parser: Parsers for various log formats - * Copyright (c) 2015-2016, Sandeep Gupta - * - * http://sangupta.com/projects/log-parser - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -package com.sangupta.logparser.common; - -/** - * A class that allows reading a {@link String} via simple tokens. - * - * @author sangupta - * - */ -public class StringTokenReader { - - private final String str; - - private final int length; - - private int current = 0; - - public StringTokenReader(String str) { - this.str = str; - this.length = str.length(); - } - - public boolean hasNext() { - return this.current < this.str.length(); - } - - public String readTillNext(char separator) { - return this.readTillNext(String.valueOf(separator), 1); - } - - public String readTillNext(char separator, int occurence) { - return this.readTillNext(String.valueOf(separator), occurence); - } - - public String readTillNext(String separator) { - return this.readTillNext(separator, 1); - } - - public String readTillNext(String separator, int occurence) { - if(!this.hasNext()) { - return null; - } - - int numFound = 0; - int index = -1; - int searchFrom = this.current; - do { - index = this.str.indexOf(separator, searchFrom); - if(index < 0) { - int start = this.current; - this.current = str.length(); - return this.str.substring(start); - } - - numFound++; - if(numFound == occurence) { - break; - } - - searchFrom = index + 1; - } while(true); - - String extracted = this.str.substring(this.current, index); - this.current = index + separator.length(); - return extracted; - } - - public String getRemaining() { - if(!this.hasNext()) { - return null; - } - - return this.str.substring(this.current); - } - - public String readBetween(char starting, char closing) { - if(!this.hasNext()) { - return null; - } - - if(starting == closing) { - // this is a special case - // find the two indexes - int start = this.str.indexOf(starting, this.current); - if(start == -1) { - return null; - } - - start++; - - int end = this.str.indexOf(closing, start + 1); - if(end == -1) { - end = this.length; - } - - this.current = end + 1; - return this.str.substring(start, end); - } - - int count = 0; - int start = -1; - boolean found = false; - for(int index = this.current; index < this.length; index++) { - char c = this.str.charAt(index); - if(c == starting) { - if(!found) { - start = index; - } - - count++; - found = true; - continue; - } - - if(c == closing) { - count--; - found = true; - - if(found && count == 0) { - this.current = index + 1; - return this.str.substring(start + 1, index); - } - } - } - - return null; - } - - /** - * Peek the first non-white-space character available - * - * @return - */ - public char peekNextNonWhitespace() { - int start = this.current; - do { - if(start >= this.length) { - return 0; - } - - char c = this.str.charAt(start); - if(Character.isWhitespace(c)) { - start++; - continue; - } - - return c; - } while(true); - } - -} diff --git a/src/main/java/com/sangupta/logparser/elb/ElbLogParser.java b/src/main/java/com/sangupta/logparser/elb/ElbLogParser.java index 2d884cf..39611f5 100644 --- a/src/main/java/com/sangupta/logparser/elb/ElbLogParser.java +++ b/src/main/java/com/sangupta/logparser/elb/ElbLogParser.java @@ -24,12 +24,12 @@ import java.io.BufferedReader; import java.io.IOException; +import com.sangupta.jerry.io.AdvancedStringReader; import com.sangupta.jerry.util.AssertUtils; import com.sangupta.logparser.LogParser; import com.sangupta.logparser.LogParserUtils; import com.sangupta.logparser.common.HttpRequest; import com.sangupta.logparser.common.IPAddress; -import com.sangupta.logparser.common.StringTokenReader; /** * A {@link LogParser} implementation for Amazon AWS Elastic-Load-Balancer @@ -57,7 +57,7 @@ public ElbLogLine parseLogLine(String logLine) { ElbLogLine elbLogLine = new ElbLogLine(); - StringTokenReader reader = new StringTokenReader(logLine); + AdvancedStringReader reader = new AdvancedStringReader(logLine); if(reader.hasNext()) { elbLogLine.timestamp = parseElbTimestamp(reader.readTillNext(SPACE)); } @@ -102,7 +102,7 @@ public ElbLogLine parseLogLine(String logLine) { elbLogLine.sentBytes = Long.parseLong(reader.readTillNext(SPACE)); } - elbLogLine.request = HttpRequest.fromString(reader.getRemaining()); + elbLogLine.request = HttpRequest.fromString(reader.readRemaining()); return elbLogLine; } diff --git a/src/main/java/com/sangupta/logparser/gc/JavaGarbageCollectionLogParser.java b/src/main/java/com/sangupta/logparser/gc/JavaGarbageCollectionLogParser.java index cd4e0a5..1cb7a01 100644 --- a/src/main/java/com/sangupta/logparser/gc/JavaGarbageCollectionLogParser.java +++ b/src/main/java/com/sangupta/logparser/gc/JavaGarbageCollectionLogParser.java @@ -4,10 +4,10 @@ import java.io.IOException; import com.sangupta.jerry.exceptions.NotImplementedException; +import com.sangupta.jerry.io.AdvancedStringReader; import com.sangupta.jerry.util.AssertUtils; import com.sangupta.logparser.LogParser; import com.sangupta.logparser.LogParserUtils; -import com.sangupta.logparser.common.StringTokenReader; import com.sangupta.logparser.gc.JavaGarbageCollectionLogLine.GCType; import com.sangupta.logparser.gc.JavaGarbageCollectionLogLine.JavaGCMemoryRecord; import com.sangupta.logparser.gc.JavaGarbageCollectionLogLine.JavaGCTimes; @@ -34,7 +34,7 @@ public JavaGarbageCollectionLogLine parseLogLine(String logLine) { } // start parsing - StringTokenReader reader = new StringTokenReader(logLine); + AdvancedStringReader reader = new AdvancedStringReader(logLine); JavaGarbageCollectionLogLine line = new JavaGarbageCollectionLogLine(); if(reader.hasNext()) { @@ -60,7 +60,7 @@ private void parseTimes(JavaGarbageCollectionLogLine line, String str) { JavaGCTimes times = new JavaGCTimes(); line.times = times; - StringTokenReader reader = new StringTokenReader(str); + AdvancedStringReader reader = new AdvancedStringReader(str); // go to user if(reader.hasNext()) { @@ -103,7 +103,7 @@ private void parseGCMemoryRecords(JavaGarbageCollectionLogLine line, String str) throw new IllegalArgumentException("Unknown GC Type"); } - StringTokenReader reader = new StringTokenReader(str); + AdvancedStringReader reader = new AdvancedStringReader(str); String record; do { diff --git a/src/main/java/com/sangupta/logparser/tomcat/TomcatAccessLogParser.java b/src/main/java/com/sangupta/logparser/tomcat/TomcatAccessLogParser.java index 01220f5..077d6a8 100644 --- a/src/main/java/com/sangupta/logparser/tomcat/TomcatAccessLogParser.java +++ b/src/main/java/com/sangupta/logparser/tomcat/TomcatAccessLogParser.java @@ -3,12 +3,12 @@ import java.io.BufferedReader; import java.io.IOException; +import com.sangupta.jerry.io.AdvancedStringReader; import com.sangupta.jerry.util.AssertUtils; import com.sangupta.logparser.LogParser; import com.sangupta.logparser.LogParserUtils; import com.sangupta.logparser.common.HttpRequest; import com.sangupta.logparser.common.IPAddress; -import com.sangupta.logparser.common.StringTokenReader; public class TomcatAccessLogParser implements LogParser { @@ -27,7 +27,7 @@ public TomcatAccessLogLine parseLogLine(String logLine) { TomcatAccessLogLine line = new TomcatAccessLogLine(); - StringTokenReader reader = new StringTokenReader(logLine); + AdvancedStringReader reader = new AdvancedStringReader(logLine); if(reader.hasNext()) { line.clientIP = IPAddress.fromString(reader.readTillNext('-')); } @@ -41,7 +41,7 @@ public TomcatAccessLogLine parseLogLine(String logLine) { } line.statusCode = LogParserUtils.asInt(reader.readTillNext(' ', 2)); - line.responseSize = LogParserUtils.asInt(reader.getRemaining(), 0); + line.responseSize = LogParserUtils.asInt(reader.readRemaining(), 0); return line; } diff --git a/src/test/java/com/sangupta/logparser/common/TestStringTokenReader.java b/src/test/java/com/sangupta/logparser/common/TestStringTokenReader.java deleted file mode 100644 index 61209c4..0000000 --- a/src/test/java/com/sangupta/logparser/common/TestStringTokenReader.java +++ /dev/null @@ -1,59 +0,0 @@ -package com.sangupta.logparser.common; - -import org.junit.Assert; -import org.junit.Test; - -public class TestStringTokenReader { - - @Test - public void testReadBetween() { - StringTokenReader reader = new StringTokenReader("Hello [World]!"); - - Assert.assertEquals("World", reader.readBetween('[', ']')); - Assert.assertEquals("!", reader.getRemaining()); - Assert.assertNull(reader.readBetween('(', ')')); - - reader = new StringTokenReader("Hello [World] - a [beautiful] place to [live] in!"); - Assert.assertEquals("World", reader.readBetween('[', ']')); - Assert.assertEquals("beautiful", reader.readBetween('[', ']')); - Assert.assertEquals("live", reader.readBetween('[', ']')); - Assert.assertEquals(" in!", reader.getRemaining()); - - // test with multiple openings - reader = new StringTokenReader("(this (is) (a (beautiful)) world)"); - Assert.assertEquals("this (is) (a (beautiful)) world", reader.readBetween('(', ')')); - Assert.assertNull(reader.readBetween('(', ')')); - Assert.assertNull(reader.getRemaining()); - - reader = new StringTokenReader("this (is) (a (beautiful)) world"); - Assert.assertEquals("is", reader.readBetween('(', ')')); - Assert.assertEquals("a (beautiful)", reader.readBetween('(', ')')); - Assert.assertEquals(" world", reader.getRemaining()); - - // when and opening closing are same - reader = new StringTokenReader("hello | world | this is a nice world"); - Assert.assertEquals(" world ", reader.readBetween('|', '|')); - Assert.assertEquals(" this is a nice world", reader.getRemaining()); - } - - @Test - public void testReadTillNextChar() { - StringTokenReader reader = new StringTokenReader("Hello [World]!"); - - Assert.assertEquals("Hello", reader.readTillNext(' ')); - Assert.assertEquals("", reader.readTillNext('[')); - Assert.assertEquals("World", reader.readTillNext(']')); - Assert.assertEquals("!", reader.getRemaining()); - } - - @Test - public void testReadTillNextString() { - StringTokenReader reader = new StringTokenReader("Hello [World] - a [beautiful] place to [live] in!"); - - Assert.assertEquals("Hello [World]", reader.readTillNext(" - ")); - Assert.assertEquals("a [beautiful]", reader.readTillNext(" place")); - Assert.assertEquals(" to [live] ", reader.readTillNext("in")); - Assert.assertEquals("!", reader.getRemaining()); - } - -}