Permalink
Browse files

feat: show proper error message when connecting to non-UTF-8 database

When database runs other than UTF-8 encoding, pgjdbc cannot easily decode
error messages, since the backend sends just bytes.
Typically, the client_encoding is set to utf-8, however errors that happen
before authentication are still sent with server-default encoding, thus
they cause issues for pgjdbc.

Added several heuristics to cover basic languages and encodings.

The relevant hackers thread: https://www.postgresql.org/message-id/bd579724-86f7-615b-fce8-8073731eef97@2ndquadrant.com

fixes #594
fixes #165
  • Loading branch information...
vlsi committed Aug 9, 2016
1 parent 232569c commit ec5fb4f5a66b6598aea1c7ab8df3126ee77d15e2
@@ -0,0 +1,138 @@
package org.postgresql.core;
import java.io.IOException;
/**
* Predicts encoding for error messages based on some heuristics:
* 1) For certain languages, it is known how "FATAL" is translated
* 2) For Japanese, several common words are hardcoded
* 3) Then try various LATIN encodings
*/
public class EncodingPredictor {
/**
* In certain cases the encoding is not known for sure (e.g. before authentication).
* In such cases, backend might send messages in "native to database" encoding,
* thus pgjdbc has to guess the encoding nad
*/
public static class DecodeResult {
public final String result;
public final String encoding; // JVM name
DecodeResult(String result, String encoding) {
this.result = result;
this.encoding = encoding;
}
}
static class Translation {
public final String fatalText;
private final String[] texts;
public final String language;
public final String[] encodings;
Translation(String fatalText, String[] texts, String language, String... encodings) {
this.fatalText = fatalText;
this.texts = texts;
this.language = language;
this.encodings = encodings;
}
}
private final static Translation[] FATAL_TRANSLATIONS =
new Translation[]{
new Translation("ВАЖНО", null, "ru", "WIN", "ALT", "KOI8"),
new Translation("致命错误", null, "zh_CN", "EUC_CN", "GBK", "BIG5"),
new Translation("KATASTROFALNY", null, "pl", "LATIN2"),
new Translation("FATALE", null, "it", "LATIN1", "LATIN9"),
new Translation("FATAL", new String[]{"は存在しません" /* ~ does not exist */,
"ロール" /* ~ role */, "ユーザ" /* ~ user */}, "ja", "EUC_JP", "SJIS"),
new Translation(null, null, "fr/de/es/pt_BR", "LATIN1", "LATIN3", "LATIN4", "LATIN5",
"LATIN7", "LATIN9"),
};
public static DecodeResult decode(byte[] bytes, int offset, int length) {
Encoding defaultEncoding = Encoding.defaultEncoding();
for (Translation tr : FATAL_TRANSLATIONS) {
for (String encoding : tr.encodings) {
Encoding encoder = Encoding.getDatabaseEncoding(encoding);
if (encoder == defaultEncoding) {
continue;
}
// If there is a translation for "FATAL", then try typical encodings for that language
if (tr.fatalText != null) {
byte[] encoded;
try {
byte[] tmp = encoder.encode(tr.fatalText);
encoded = new byte[tmp.length + 2];
encoded[0] = 'S';
encoded[encoded.length - 1] = 0;
System.arraycopy(tmp, 0, encoded, 1, tmp.length);
} catch (IOException e) {
continue;// should not happen
}
if (!arrayContains(bytes, offset, length, encoded, 0, encoded.length)) {
continue;
}
}
// No idea how to tell Japanese from Latin languages, thus just hard-code certain Japanese words
if (tr.texts != null) {
boolean foundOne = false;
for (String text : tr.texts) {
try {
byte[] textBytes = encoder.encode(text);
if (arrayContains(bytes, offset, length, textBytes, 0, textBytes.length)) {
foundOne = true;
break;
}
} catch (IOException e) {
// do not care, will try other encodings
}
}
if (!foundOne) {
// Error message does not have key parts, will try other encodings
continue;
}
}
try {
String decoded = encoder.decode(bytes, offset, length);
if (decoded.indexOf(65533) != -1) {
// bad character in string, try another encoding
continue;
}
return new DecodeResult(decoded, encoder.name());
} catch (IOException e) {
// do not care
}
}
}
return null;
}
private static boolean arrayContains(
byte[] first, int firstOffset, int firstLength,
byte[] second, int secondOffset, int secondLength
) {
if (firstLength < secondLength) {
return false;
}
for (int i = 0; i < firstLength; i++) {
for (; i < firstLength && first[firstOffset + i] != second[secondOffset]; i++) {
// find the first matching byte
}
int j = 1;
for (; j < secondLength && first[firstOffset + i + j] == second[secondOffset + j]; j++) {
// compare arrays
}
if (j == secondLength) {
return true;
}
}
return false;
}
}
@@ -333,6 +333,36 @@ public String receiveString(int len) throws IOException {
return res;
}
/**
* Receives a fixed-size string from the backend, and tries to avoid "UTF-8 decode failed"
* errors.
*
* @param len the length of the string to receive, in bytes.
* @return the decoded string
* @throws IOException if something wrong happens
*/
public EncodingPredictor.DecodeResult receiveErrorString(int len) throws IOException {
if (!pg_input.ensureBytes(len)) {
throw new EOFException();
}
EncodingPredictor.DecodeResult res;
try {
String value = encoding.decode(pg_input.getBuffer(), pg_input.getIndex(), len);
// no autodetect warning as the message was converted on its own
res = new EncodingPredictor.DecodeResult(value, null);
} catch (IOException e) {
res = EncodingPredictor.decode(pg_input.getBuffer(), pg_input.getIndex(), len);
if (res == null) {
Encoding enc = Encoding.defaultEncoding();
String value = enc.decode(pg_input.getBuffer(), pg_input.getIndex(), len);
res = new EncodingPredictor.DecodeResult(value, enc.name());
}
}
pg_input.skip(len);
return res;
}
/**
* Receives a null-terminated string from the backend. If we don't see a null, then we assume
* something has gone wrong.
@@ -439,7 +439,7 @@ private void doAuthentication(PGStream pgStream, String host, String user, Prope
}
ServerErrorMessage errorMsg =
new ServerErrorMessage(pgStream.receiveString(l_elen - 4), logger.getLogLevel());
new ServerErrorMessage(pgStream.receiveErrorString(l_elen - 4), logger.getLogLevel());
if (logger.logDebug()) {
logger.debug(" <=BE ErrorMessage(" + errorMsg + ")");
}
@@ -12,6 +12,7 @@
import org.postgresql.PGProperty;
import org.postgresql.copy.CopyOperation;
import org.postgresql.core.Encoding;
import org.postgresql.core.EncodingPredictor;
import org.postgresql.core.Field;
import org.postgresql.core.Logger;
import org.postgresql.core.NativeQuery;
@@ -2391,7 +2392,7 @@ private SQLException receiveErrorResponse() throws IOException {
// check at the bottom to see if we need to throw an exception
int elen = pgStream.receiveInteger4();
String totalMessage = pgStream.receiveString(elen - 4);
EncodingPredictor.DecodeResult totalMessage = pgStream.receiveErrorString(elen - 4);
ServerErrorMessage errorMsg = new ServerErrorMessage(totalMessage, logger.getLogLevel());
if (logger.logDebug()) {
@@ -105,7 +105,7 @@ public Exception run() {
if (response == 'E') {
int l_elen = pgStream.receiveInteger4();
ServerErrorMessage l_errorMsg =
new ServerErrorMessage(pgStream.receiveString(l_elen - 4), logger.getLogLevel());
new ServerErrorMessage(pgStream.receiveErrorString(l_elen - 4), logger.getLogLevel());
if (logger.logDebug()) {
logger.debug(" <=BE ErrorMessage(" + l_errorMsg + ")");
@@ -8,6 +8,8 @@
package org.postgresql.util;
import org.postgresql.core.EncodingPredictor;
import java.io.Serializable;
import java.util.HashMap;
import java.util.Map;
@@ -35,6 +37,16 @@
private final Map<Character, String> m_mesgParts = new HashMap<Character, String>();
private final int verbosity;
public ServerErrorMessage(EncodingPredictor.DecodeResult serverError, int verbosity) {
this(serverError.result, verbosity);
if (serverError.encoding != null) {
m_mesgParts.put(MESSAGE, m_mesgParts.get(MESSAGE)
+ GT.tr(" (pgjdbc: autodetected server-encoding to be {0}, if the message is not readable, please check database logs and/or host, port, dbname, user, password, pg_hba.conf)",
serverError.encoding)
);
}
}
public ServerErrorMessage(String p_serverError, int verbosity) {
this.verbosity = verbosity;

0 comments on commit ec5fb4f

Please sign in to comment.