Skip to content

Commit

Permalink
Drop all use of OSSpinLock
Browse files Browse the repository at this point in the history
Apple engineers have pointed out that OSSpinLocks are vulnerable to live locking
on iOS in cases of priority inversion:
. http://mjtsai.com/blog/2015/12/16/osspinlock-is-unsafe/
. https://lists.swift.org/pipermail/swift-dev/Week-of-Mon-20151214/000372.html

- Use a dispatch_semaphore_t within the extension registry.
- Use a dispatch_semaphore_t for protecting autocreation within messages.
- Drop the custom/internal GPBString class since we don't have really good
  numbers to judge the locking replacements and it isn't required. We can
  always bring it back with real data in the future.
  • Loading branch information
thomasvl committed Dec 17, 2015
1 parent afbc89a commit d6590d6
Show file tree
Hide file tree
Showing 11 changed files with 53 additions and 874 deletions.
1 change: 0 additions & 1 deletion Makefile.am
Expand Up @@ -468,7 +468,6 @@ objectivec_EXTRA_DIST= \
objectivec/Tests/GPBMessageTests.m \
objectivec/Tests/GPBObjectiveCPlusPlusTest.mm \
objectivec/Tests/GPBPerfTests.m \
objectivec/Tests/GPBStringTests.m \
objectivec/Tests/GPBSwiftTests.swift \
objectivec/Tests/GPBTestUtilities.h \
objectivec/Tests/GPBTestUtilities.m \
Expand Down
329 changes: 11 additions & 318 deletions objectivec/GPBCodedInputStream.m
Expand Up @@ -216,7 +216,17 @@ int32_t GPBCodedInputStreamReadTag(GPBCodedInputStreamState *state) {
result = @"";
} else {
CheckSize(state, size);
result = GPBCreateGPBStringWithUTF8(&state->bytes[state->bufferPos], size);
result = [[NSString alloc] initWithBytes:&state->bytes[state->bufferPos]
length:size
encoding:NSUTF8StringEncoding];
if (!result) {
result = @"";
#ifdef DEBUG
// https://developers.google.com/protocol-buffers/docs/proto#scalar
NSLog(@"UTF8 failure, is some field type 'string' when it should be "
@"'bytes'?");
#endif
}
state->bufferPos += size;
}
return result;
Expand Down Expand Up @@ -478,320 +488,3 @@ - (int64_t)readSInt64 {
}

@end

@implementation GPBString {
@package
CFStringRef string_;
unsigned char *utf8_;
NSUInteger utf8Len_;

// This lock is used to gate access to utf8_. Once GPBStringInitStringValue()
// has been called, string_ will be filled in, and utf8_ will be NULL.
OSSpinLock lock_;

BOOL hasBOM_;
BOOL is7BitAscii_;
}

// Returns true if the passed in bytes are 7 bit ascii.
// This routine needs to be fast.
static bool AreBytesIn7BitASCII(const uint8_t *bytes, NSUInteger len) {
// In the loops below, it's more efficient to collect rather than do
// conditional at every step.
#if __LP64__
// Align bytes. This is especially important in case of 3 byte BOM.
while (len > 0 && ((size_t)bytes & 0x07)) {
if (*bytes++ & 0x80) return false;
len--;
}
while (len >= 32) {
uint64_t val = *(const uint64_t *)bytes;
uint64_t hiBits = (val & 0x8080808080808080ULL);
bytes += 8;
val = *(const uint64_t *)bytes;
hiBits |= (val & 0x8080808080808080ULL);
bytes += 8;
val = *(const uint64_t *)bytes;
hiBits |= (val & 0x8080808080808080ULL);
bytes += 8;
val = *(const uint64_t *)bytes;
if (hiBits | (val & 0x8080808080808080ULL)) return false;
bytes += 8;
len -= 32;
}

while (len >= 16) {
uint64_t val = *(const uint64_t *)bytes;
uint64_t hiBits = (val & 0x8080808080808080ULL);
bytes += 8;
val = *(const uint64_t *)bytes;
if (hiBits | (val & 0x8080808080808080ULL)) return false;
bytes += 8;
len -= 16;
}

while (len >= 8) {
uint64_t val = *(const uint64_t *)bytes;
if (val & 0x8080808080808080ULL) return false;
bytes += 8;
len -= 8;
}
#else // __LP64__
// Align bytes. This is especially important in case of 3 byte BOM.
while (len > 0 && ((size_t)bytes & 0x03)) {
if (*bytes++ & 0x80) return false;
len--;
}
while (len >= 16) {
uint32_t val = *(const uint32_t *)bytes;
uint32_t hiBits = (val & 0x80808080U);
bytes += 4;
val = *(const uint32_t *)bytes;
hiBits |= (val & 0x80808080U);
bytes += 4;
val = *(const uint32_t *)bytes;
hiBits |= (val & 0x80808080U);
bytes += 4;
val = *(const uint32_t *)bytes;
if (hiBits | (val & 0x80808080U)) return false;
bytes += 4;
len -= 16;
}

while (len >= 8) {
uint32_t val = *(const uint32_t *)bytes;
uint32_t hiBits = (val & 0x80808080U);
bytes += 4;
val = *(const uint32_t *)bytes;
if (hiBits | (val & 0x80808080U)) return false;
bytes += 4;
len -= 8;
}
#endif // __LP64__

while (len >= 4) {
uint32_t val = *(const uint32_t *)bytes;
if (val & 0x80808080U) return false;
bytes += 4;
len -= 4;
}

while (len--) {
if (*bytes++ & 0x80) return false;
}

return true;
}

static void GPBStringInitStringValue(GPBString *string) {
OSSpinLockLock(&string->lock_);
GPBStringInitStringValueAlreadyLocked(string);
OSSpinLockUnlock(&string->lock_);
}

static void GPBStringInitStringValueAlreadyLocked(GPBString *string) {
if (string->string_ == NULL && string->utf8_ != NULL) {
// Using kCFAllocatorMalloc for contentsDeallocator, as buffer in
// string->utf8_ is being handed off.
string->string_ = CFStringCreateWithBytesNoCopy(
NULL, string->utf8_, string->utf8Len_, kCFStringEncodingUTF8, false,
kCFAllocatorMalloc);
if (!string->string_) {
#ifdef DEBUG
// https://developers.google.com/protocol-buffers/docs/proto#scalar
NSLog(@"UTF8 failure, is some field type 'string' when it should be "
@"'bytes'?");
#endif
string->string_ = CFSTR("");
string->utf8Len_ = 0;
// On failure, we have to clean up the buffer.
free(string->utf8_);
}
string->utf8_ = NULL;
}
}

GPBString *GPBCreateGPBStringWithUTF8(const void *bytes, NSUInteger length) {
GPBString *result = [[GPBString alloc] initWithBytes:bytes length:length];
return result;
}

- (instancetype)initWithBytes:(const void *)bytes length:(NSUInteger)length {
self = [super init];
if (self) {
utf8_ = malloc(length);
memcpy(utf8_, bytes, length);
utf8Len_ = length;
lock_ = OS_SPINLOCK_INIT;
is7BitAscii_ = AreBytesIn7BitASCII(bytes, length);
if (length >= 3 && memcmp(utf8_, "\xef\xbb\xbf", 3) == 0) {
// We can't just remove the BOM from the string here, because in the case
// where we have > 1 BOM at the beginning of the string, we will remove one,
// and the internal NSString we create will remove the next one, and we will
// end up with a GPBString != NSString issue.
// We also just can't remove all the BOMs because then we would end up with
// potential cases where a GPBString and an NSString made with the same
// UTF8 buffer would in fact be different.
// We record the fact we have a BOM, and use it as necessary to simulate
// what NSString would return for various calls.
hasBOM_ = YES;
#if DEBUG
// Sending BOMs across the line is just wasting bits.
NSLog(@"Bad data? String should not have BOM!");
#endif // DEBUG
}
}
return self;
}

- (void)dealloc {
if (string_ != NULL) {
CFRelease(string_);
}
if (utf8_ != NULL) {
free(utf8_);
}
[super dealloc];
}

// Required NSString overrides.
- (NSUInteger)length {
if (is7BitAscii_) {
return utf8Len_;
} else {
GPBStringInitStringValue(self);
return CFStringGetLength(string_);
}
}

- (unichar)characterAtIndex:(NSUInteger)anIndex {
OSSpinLockLock(&lock_);
if (is7BitAscii_ && utf8_) {
unichar result = utf8_[anIndex];
OSSpinLockUnlock(&lock_);
return result;
} else {
GPBStringInitStringValueAlreadyLocked(self);
OSSpinLockUnlock(&lock_);
return CFStringGetCharacterAtIndex(string_, anIndex);
}
}

// Override a couple of methods that typically want high performance.

- (id)copyWithZone:(NSZone *)zone {
GPBStringInitStringValue(self);
return [(NSString *)string_ copyWithZone:zone];
}

- (id)mutableCopyWithZone:(NSZone *)zone {
GPBStringInitStringValue(self);
return [(NSString *)string_ mutableCopyWithZone:zone];
}

- (NSUInteger)hash {
// Must convert to string here to make sure that the hash is always
// consistent no matter what state the GPBString is in.
GPBStringInitStringValue(self);
return CFHash(string_);
}

- (BOOL)isEqual:(id)object {
if (self == object) {
return YES;
}
if ([object isKindOfClass:[NSString class]]) {
GPBStringInitStringValue(self);
return CFStringCompare(string_, (CFStringRef)object, 0) ==
kCFCompareEqualTo;
}
return NO;
}

- (void)getCharacters:(unichar *)buffer range:(NSRange)aRange {
OSSpinLockLock(&lock_);
if (is7BitAscii_ && utf8_) {
unsigned char *bytes = &(utf8_[aRange.location]);
for (NSUInteger i = 0; i < aRange.length; ++i) {
buffer[i] = bytes[i];
}
OSSpinLockUnlock(&lock_);
} else {
GPBStringInitStringValueAlreadyLocked(self);
OSSpinLockUnlock(&lock_);
CFStringGetCharacters(string_, CFRangeMake(aRange.location, aRange.length),
buffer);
}
}

- (NSUInteger)lengthOfBytesUsingEncoding:(NSStringEncoding)encoding {
if ((encoding == NSUTF8StringEncoding) ||
(encoding == NSASCIIStringEncoding && is7BitAscii_)) {
return utf8Len_ - (hasBOM_ ? 3 : 0);
} else {
GPBStringInitStringValue(self);
return [(NSString *)string_ lengthOfBytesUsingEncoding:encoding];
}
}

- (BOOL)getBytes:(void *)buffer
maxLength:(NSUInteger)maxLength
usedLength:(NSUInteger *)usedLength
encoding:(NSStringEncoding)encoding
options:(NSStringEncodingConversionOptions)options
range:(NSRange)range
remainingRange:(NSRangePointer)remainingRange {
// [NSString getBytes:maxLength:usedLength:encoding:options:range:remainingRange]
// does not return reliable results if the maxLength argument is 0
// (Radar 16385183). Therefore we have special cased it as a slow case so
// that it behaves however Apple intends it to behave. It should be a rare
// case.
//
// [NSString getBytes:maxLength:usedLength:encoding:options:range:remainingRange]
// does not return reliable results if the range is outside of the strings
// length (Radar 16396177). Therefore we have special cased it as a slow
// case so that it behaves however Apple intends it to behave. It should
// be a rare case.
//
// We can optimize the UTF8StringEncoding and NSASCIIStringEncoding with no
// options cases.
if ((options == 0) &&
(encoding == NSUTF8StringEncoding || encoding == NSASCIIStringEncoding) &&
(maxLength != 0) &&
(NSMaxRange(range) <= utf8Len_)) {
// Might be able to optimize it.
OSSpinLockLock(&lock_);
if (is7BitAscii_ && utf8_) {
NSUInteger length = range.length;
length = (length < maxLength) ? length : maxLength;
memcpy(buffer, utf8_ + range.location, length);
if (usedLength) {
*usedLength = length;
}
if (remainingRange) {
remainingRange->location = range.location + length;
remainingRange->length = range.length - length;
}
OSSpinLockUnlock(&lock_);
if (length > 0) {
return YES;
} else {
return NO;
}
} else {
GPBStringInitStringValueAlreadyLocked(self);
OSSpinLockUnlock(&lock_);
}
} else {
GPBStringInitStringValue(self);
}
return [(NSString *)string_ getBytes:buffer
maxLength:maxLength
usedLength:usedLength
encoding:encoding
options:options
range:range
remainingRange:remainingRange];
}

@end
17 changes: 0 additions & 17 deletions objectivec/GPBCodedInputStream_PackagePrivate.h
Expand Up @@ -39,19 +39,6 @@
@class GPBUnknownFieldSet;
@class GPBFieldDescriptor;

// GPBString is a string subclass that avoids the overhead of initializing
// a full NSString until it is actually needed. Lots of protocol buffers contain
// strings, and instantiating all of those strings and having them parsed to
// verify correctness when the message was being read was expensive, when many
// of the strings were never being used.
//
// Note for future-self. I tried implementing this using a NSProxy.
// Turned out the performance was horrible in client apps because folks
// like to use libraries like SBJSON that grab characters one at a time.
// The proxy overhead was a killer.
@interface GPBString : NSString
@end

typedef struct GPBCodedInputStreamState {
const uint8_t *bytes;
size_t bufferSize;
Expand Down Expand Up @@ -92,10 +79,6 @@ typedef struct GPBCodedInputStreamState {

CF_EXTERN_C_BEGIN

// Returns a GPBString with a +1 retain count.
GPBString *GPBCreateGPBStringWithUTF8(const void *bytes, NSUInteger length)
__attribute__((ns_returns_retained));

int32_t GPBCodedInputStreamReadTag(GPBCodedInputStreamState *state);

double GPBCodedInputStreamReadDouble(GPBCodedInputStreamState *state);
Expand Down

0 comments on commit d6590d6

Please sign in to comment.