Skip to content

Commit

Permalink
оптимизация RuTypoChanger
Browse files Browse the repository at this point in the history
профилировка показывает что заметная часть CPU уходит на замену кавычек.

Сделано:

* предкомпилированные regexp'ы вместо String.replaceAll и String.replace (которые
  внутри компилят регекспы при каждом вызове)
* замена StringBuffer на StringBuilder, тредобезопасность тут не требуется, но может мешать JIT
* использование CharSequence чтобы избежать множественный StringBuilder.toString
  • Loading branch information
maxcom committed Apr 24, 2015
1 parent 112dbcd commit c2ce230
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 21 deletions.
48 changes: 29 additions & 19 deletions src/main/java/ru/org/linux/util/formatter/RuTypoChanger.java
Expand Up @@ -15,6 +15,8 @@

package ru.org.linux.util.formatter;

import java.util.regex.Pattern;

public class RuTypoChanger {

/*
Expand All @@ -36,7 +38,19 @@ public class RuTypoChanger {
private static final char[] PUNCTUATION = {'.', ',', ':', ';', '-', '!', '?', '(', ')'};

private int quoteDepth = 0;
private String localBuff = "";
private CharSequence localBuff = "";

private final static Pattern QUOTE_PATTERN = Pattern.compile(""", Pattern.LITERAL);
private final static Pattern QUOTE_CHAR_PATTERN = Pattern.compile("(''|\")");

private final static Pattern QUOTE_IN_OPEN_PATTERN =
Pattern.compile(Character.toString(QUOTE_IN_OPEN), Pattern.LITERAL);
private final static Pattern QUOTE_IN_CLOSE_PATTERN =
Pattern.compile(Character.toString(QUOTE_IN_CLOSE), Pattern.LITERAL);
private final static Pattern QUOTE_OUT_OPEN_PATTERN =
Pattern.compile(Character.toString(QUOTE_OUT_OPEN), Pattern.LITERAL);
private final static Pattern QUOTE_OUT_CLOSE_PATTERN =
Pattern.compile(Character.toString(QUOTE_OUT_CLOSE), Pattern.LITERAL);

private static boolean isQuoteChar(char ch) {
return ch == QUOTE_SYMBOL ||
Expand All @@ -51,17 +65,15 @@ private static boolean isPunctuation(char ch) {
return false;
}

private static char firstNonQuote(String buff, int start) {

private static char firstNonQuote(CharSequence buff, int start) {
for (int pt = start - 1; pt >= 0; pt--) {
if (!isQuoteChar(buff.charAt(pt)))
return buff.charAt(pt);
}
return buff.charAt(0);
}

private static char lastNonQuote(String buff, int start) {

private static char lastNonQuote(CharSequence buff, int start) {
for (int pt = start + 1; pt < buff.length(); pt++) {
if (!isQuoteChar(buff.charAt(pt)))
return buff.charAt(pt);
Expand All @@ -70,7 +82,7 @@ private static char lastNonQuote(String buff, int start) {
}


private static boolean isQuoteOpening(String buff, int position) {
private static boolean isQuoteOpening(CharSequence buff, int position) {
char before, after;

if (position == buff.length() - 1)
Expand All @@ -92,10 +104,10 @@ else if (position == 0)
return true;
}

private boolean isQuoteClosing(String buff, int position) {
private boolean isQuoteClosing(CharSequence buff, int position) {
char before, after;

if (position == 0 && localBuff.equals(""))
if (position == 0 && localBuff.length()==0)
return false;
else if (position == buff.length() - 1)
return true;
Expand Down Expand Up @@ -124,20 +136,19 @@ else if (position == buff.length() - 1)
*/

public String format(String input) {

StringBuffer buff = new StringBuffer(input.replaceAll("&quot;", "\""));
StringBuilder buff = new StringBuilder(QUOTE_PATTERN.matcher(input).replaceAll("\""));

for (int iter = 0; iter < buff.length(); iter++) {
if (buff.charAt(iter) == QUOTE_SYMBOL) {
if (isQuoteClosing(buff.toString(), iter) && quoteDepth > 0) {
if (isQuoteClosing(buff, iter) && quoteDepth > 0) {
if (quoteDepth == 1)
buff.setCharAt(iter, QUOTE_OUT_CLOSE);
else
buff.setCharAt(iter, QUOTE_IN_CLOSE);
quoteDepth--;
}
else
if (isQuoteOpening(buff.toString(), iter)) { //убеждаемся, что всё так
if (isQuoteOpening(buff, iter)) { //убеждаемся, что всё так
if (quoteDepth == 0)
buff.setCharAt(iter, QUOTE_OUT_OPEN);
else
Expand All @@ -149,15 +160,14 @@ public String format(String input) {

}

localBuff = buff.toString();
input = buff.toString().replaceAll("(''|\")", "&quot;");
localBuff = buff;
input = QUOTE_CHAR_PATTERN.matcher(buff).replaceAll("&quot;");

input = input.replaceAll(Character.toString(QUOTE_IN_OPEN), QUOTE_IN_OPEN_HTML);
input = input.replaceAll(Character.toString(QUOTE_IN_CLOSE), QUOTE_IN_CLOSE_HTML);
input = input.replaceAll(Character.toString(QUOTE_OUT_OPEN), QUOTE_OUT_OPEN_HTML);
input = input.replaceAll(Character.toString(QUOTE_OUT_CLOSE), QUOTE_OUT_CLOSE_HTML);
input = QUOTE_IN_OPEN_PATTERN.matcher(input).replaceAll(QUOTE_IN_OPEN_HTML);
input = QUOTE_IN_CLOSE_PATTERN.matcher(input).replaceAll(QUOTE_IN_CLOSE_HTML);
input = QUOTE_OUT_OPEN_PATTERN.matcher(input).replaceAll(QUOTE_OUT_OPEN_HTML);
input = QUOTE_OUT_CLOSE_PATTERN.matcher(input).replaceAll(QUOTE_OUT_CLOSE_HTML);

return input;
}

}
Expand Up @@ -31,8 +31,6 @@ public class RuTypoChangerTest {
private final String inputString;
private final String expectedResult;

private static final RuTypoChanger typoChanger = new RuTypoChanger();

public RuTypoChangerTest(String inputString, String expectedResult) {
this.inputString = inputString;
this.expectedResult = expectedResult;
Expand Down

2 comments on commit c2ce230

@kazaand
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Насколько лучше стало после этого коммита?

@wayerr
Copy link

@wayerr wayerr commented on c2ce230 Apr 25, 2015

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

стало быстрее и меньше создавать мелких объектов в памяти и только

Please sign in to comment.