Permalink
Browse files

Fix some issues with charset handling during connect

* hostname and file paths are places where libmysqlclient ultimately only passes the string to an OS library so we should use whatever charset the OS expects, not mysql
* Even though _makeRawMySQLConnectionWithEncoding:isMasterConnection: takes an explicit charset argument most of the conversion logic simply used whatever charset the existing connection currently has, which is not neccesarily the one the new connection should use
* Add some remarks about the charset handling with passwords and mysql_error()
* Charsets do not apply to sqlstate.
  • Loading branch information...
dmoagx committed May 1, 2017
1 parent 2b081c2 commit fac661c5d1dcac7780521facdaebdc6f4c1afa27
@@ -34,6 +34,7 @@
@interface SPMySQLConnection (Conversion)
+ (const char *)_cStringForString:(NSString *)aString usingEncoding:(NSStringEncoding)anEncoding returningLengthAs:(NSUInteger *)cStringLengthPointer;
+ (NSString *)_stringForCString:(const char *)cString usingEncoding:(NSStringEncoding)encoding;
- (const char *)_cStringForString:(NSString *)aString;
- (NSString *)_stringForCString:(const char *)cString;
@@ -56,3 +57,16 @@ static inline const char* _cStringForStringWithEncoding(NSString* aString, NSStr
return (const char *)(*cachedMethodPointer)(cachedClass, cachedSelector, aString, anEncoding, cStringLengthPointer);
}
/**
* Converts a C string (NUL-terminated) to an NSString using the supplied encoding.
*
* Unlike +[NSString stringWithCString:encoding:] which will crash on a NULL pointer, this method will return nil instead.
*/
static inline NSString * _stringForCStringWithEncoding(const char *aString, NSStringEncoding inputEncoding)
{
//This implementation is smaller than the cached selector voodoo above, so let's do it inline
//NSString will crash on NULL ptr
return (aString == NULL)? nil : [NSString stringWithCString:aString encoding:inputEncoding];
}
@@ -79,19 +79,23 @@ - (const char *)_cStringForString:(NSString *)aString
}
/**
* Converts a C string to an NSString using the supplied encoding.
* Converts a C string to an NSString using the current connection encoding.
* This method *will not* correctly preserve nul characters within c strings; instead
* the first nul character within the string will be treated as the line ending. This
* is unavoidable without supplying a string length, so this method should not be widely
* used for actual data conversion.
*/
- (NSString *)_stringForCString:(const char *)cString
{
return _stringForCStringWithEncoding(cString, stringEncoding);
}
// Don't try and convert null strings
if (cString == NULL) return nil;
return [NSString stringWithCString:cString encoding:stringEncoding];
/**
* @see _stringForCStringWithEncoding()
*/
+ (NSString *)_stringForCString:(const char *)cString usingEncoding:(NSStringEncoding)encoding
{
return _stringForCStringWithEncoding(cString, encoding);
}
@end
@@ -323,7 +323,8 @@ - (id)queryString:(NSString *)theQueryString usingEncoding:(NSStringEncoding)the
// Store the error state
theErrorMessage = [self _stringForCString:mysql_error(mySQLConnection)];
theErrorID = mysql_errno(mySQLConnection);
theSqlstate = [self _stringForCString:mysql_sqlstate(mySQLConnection)];
// sqlstate is always an ASCII string, regardless of charset (but use latin1 anyway as that is less picky about invalid bytes)
theSqlstate = _stringForCStringWithEncoding(mysql_sqlstate(mySQLConnection), NSISOLatin1StringEncoding);
// Prevent retries if the query was cancelled or not a connection error
if (lastQueryWasCancelled || ![SPMySQLConnection isErrorIDConnectionError:theErrorID]) {
@@ -382,7 +383,8 @@ - (id)queryString:(NSString *)theQueryString usingEncoding:(NSStringEncoding)the
// Update the error message, if appropriate, to reflect result store errors or overall success
theErrorMessage = [self _stringForCString:mysql_error(mySQLConnection)];
theErrorID = mysql_errno(mySQLConnection);
theSqlstate = [self _stringForCString:mysql_sqlstate(mySQLConnection)];
// sqlstate is always an ASCII string, regardless of charset (but use latin1 anyway as that is less picky about invalid bytes)
theSqlstate = _stringForCStringWithEncoding(mysql_sqlstate(mySQLConnection), NSISOLatin1StringEncoding);
} else {
theResult = [[SPMySQLEmptyResult alloc] init];
}
@@ -735,7 +737,8 @@ - (void)_updateLastSqlstate:(NSString *)theSqlstate
{
// If a SQLSTATE wasn't supplied, select one from the connection
if(!theSqlstate) {
theSqlstate = [self _stringForCString:mysql_sqlstate(mySQLConnection)];
// sqlstate is always an ASCII string, regardless of charset (but use latin1 anyway as that is less picky about invalid bytes)
theSqlstate = _stringForCStringWithEncoding(mysql_sqlstate(mySQLConnection), NSISOLatin1StringEncoding);
}
// Clear the last SQLSTATE stored on the instance
@@ -606,6 +606,7 @@ - (MYSQL *)_makeRawMySQLConnectionWithEncoding:(NSString *)encodingName isMaster
mysql_options(theConnection, MYSQL_OPT_CONNECT_TIMEOUT, (const void *)&timeout);
// Set the connection encoding
NSStringEncoding connectEncodingNS = [SPMySQLConnection stringEncodingForMySQLCharset:[encodingName UTF8String]];
mysql_options(theConnection, MYSQL_SET_CHARSET_NAME, [encodingName UTF8String]);
// Set up the connection variables in the format MySQL needs, from the class-wide variables
@@ -614,22 +615,36 @@ - (MYSQL *)_makeRawMySQLConnectionWithEncoding:(NSString *)encodingName isMaster
const char *thePassword = NULL;
const char *theSocket = NULL;
if (host) theHost = [self _cStringForString:host];
if (username) theUsername = [self _cStringForString:username];
if (host) theHost = [host UTF8String]; //mysql calls getaddrinfo on the hostname. Apples code uses -UTF8String in that situation.
if (username) theUsername = _cStringForStringWithEncoding(username, connectEncodingNS, NULL); //during connect this is in MYSQL_SET_CHARSET_NAME encoding
// If a password was supplied, use it; otherwise ask the delegate if appropriate
// If a password was supplied, use it; otherwise ask the delegate if appropriate.
//
// Note that password has no charset in mysql: If a user password is set to 'ü' on a latin1 connection
// and you later try to connect on an UTF-8 terminal (or vice versa) it will fail. The MySQL (5.5) manual wrongly states that
// MYSQL_SET_CHARSET_NAME has influence over that, but it does not and could not, since the password is hashed by the client
// before transmitting it to the server and the (5.5) client has no charset support, effectively treating password as
// a NUL-terminated byte array.
// There is one exception, though: The "mysql_clear_password" auth plugin sends the password in plaintext and the server side
// MAY choose to do a charset conversion as appropriate before handing it to whatever backend is used.
// Since we don't know which auth plugin server and client will agree upon, we'll do as the manual says...
if (password) {
thePassword = [self _cStringForString:password];
thePassword = _cStringForStringWithEncoding(password, connectEncodingNS, NULL);
} else if ([delegate respondsToSelector:@selector(keychainPasswordForConnection:)]) {
thePassword = [self _cStringForString:[delegate keychainPasswordForConnection:self]];
thePassword = _cStringForStringWithEncoding([delegate keychainPasswordForConnection:self], connectEncodingNS, NULL);
}
// If set to use a socket and a socket was supplied, use it; otherwise, search for a socket to use
if (useSocket) {
if ([socketPath length]) {
theSocket = [self _cStringForString:socketPath];
} else {
theSocket = [self _cStringForString:[SPMySQLConnection findSocketPath]];
//default to user supplied path
NSString *mySocketPath = socketPath;
//if none was given, search in the default locations instead
if (![mySocketPath length]) {
mySocketPath = [SPMySQLConnection findSocketPath];
}
//get C string if we have a path (danger: method will throw on empty/nil string!)
if([mySocketPath length]) {
theSocket = [mySocketPath fileSystemRepresentation];
}
}
@@ -640,14 +655,14 @@ - (MYSQL *)_makeRawMySQLConnectionWithEncoding:(NSString *)encodingName isMaster
const char *theCACertificatePath = NULL;
const char *theSSLCiphers = SPMySQLSSLPermissibleCiphers;
if (sslKeyFilePath) {
theSSLKeyFilePath = [[sslKeyFilePath stringByExpandingTildeInPath] UTF8String];
if ([sslKeyFilePath length]) {
theSSLKeyFilePath = [[sslKeyFilePath stringByExpandingTildeInPath] fileSystemRepresentation];
}
if (sslCertificatePath) {
theSSLCertificatePath = [[sslCertificatePath stringByExpandingTildeInPath] UTF8String];
if ([sslCertificatePath length]) {
theSSLCertificatePath = [[sslCertificatePath stringByExpandingTildeInPath] fileSystemRepresentation];
}
if (sslCACertificatePath) {
theCACertificatePath = [[sslCACertificatePath stringByExpandingTildeInPath] UTF8String];
if ([sslCACertificatePath length]) {
theCACertificatePath = [[sslCACertificatePath stringByExpandingTildeInPath] fileSystemRepresentation];
}
if(sslCipherList) {
theSSLCiphers = [sslCipherList UTF8String];
@@ -663,9 +678,30 @@ - (MYSQL *)_makeRawMySQLConnectionWithEncoding:(NSString *)encodingName isMaster
// If the connection is the master connection, record the error state
if (isMaster) {
// <TODO>
// this is tricky: mysql_error() is supposed to return data encoded in character_set_results (in mysql 5.5+),
// yet the whole API treats it as if it were a plain C string.
// So if the charset is e.g. utf16 the mysql server will itself fall over that and return an empty error message
// (5.5, 5.7: the message is really missing at the network layer).
// (Side Note: There is a workaround for server generated error messages: "show warnings" will also include errors
// and because it uses a regular results table it can contain the actual error message)
//
// Before 5.5 things are much worse, because the charset of the message depends on the language of the error messages
// (which can be changed at runtime per session (or at launch time in 4.1)) plus all arguments in the template string
// will retain their original encoding.
// So if you connect with utf8 to a server with russian locale the error message will be in koi8r and contain the name of
// an erroneus value in utf8...
//
// On the other hand mysql_error() may also return errors generated by the client locally.
// The client has no charset support and simply assumes the local charset is ASCII-compatible.
// The english messages are compiled into the client (see libmysql/errmsg.c and include/errmsg.h).
// We could use a little trick, though: client errors are in the exclusive range 2000 to 2999 (CR_MIN_ERROR/CR_MAX_ERROR)
// and all their string arguments are either hostnames or file system paths, which on OS X use UTF-8.
[self _updateLastErrorMessage:[self _stringForCString:mysql_error(theConnection)]];
// </TODO>
[self _updateLastErrorID:mysql_errno(theConnection)];
[self _updateLastSqlstate:[self _stringForCString:mysql_sqlstate(theConnection)]];
// sqlstate is always an ASCII string, regardless of charset (but use latin1 anyway as that is less picky about invalid bytes)
[self _updateLastSqlstate:_stringForCStringWithEncoding(mysql_sqlstate(theConnection),NSISOLatin1StringEncoding)];
}
return NULL;

0 comments on commit fac661c

Please sign in to comment.