Skip to content

Commit

Permalink
Improve string encoding times.
Browse files Browse the repository at this point in the history
Cache a reference to Encoding.UTF8 - the property access is (rather surprisingly) significant.
Additionally, when we detect that the string is all ASCII (due to the computed length in bytes being the length in characters), we can perform the encoding very efficiently ourselves.
  • Loading branch information
jskeet committed Jun 11, 2015
1 parent 954e720 commit 35e4dbd
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 5 deletions.
4 changes: 2 additions & 2 deletions csharp/src/ProtocolBuffers/CodedOutputStream.ComputeSize.cs
Expand Up @@ -135,7 +135,7 @@ public static int ComputeBoolSize(int fieldNumber, bool value)
/// </summary>
public static int ComputeStringSize(int fieldNumber, String value)
{
int byteArraySize = Encoding.UTF8.GetByteCount(value);
int byteArraySize = UTF8.GetByteCount(value);
return ComputeTagSize(fieldNumber) +
ComputeRawVarint32Size((uint) byteArraySize) +
byteArraySize;
Expand Down Expand Up @@ -323,7 +323,7 @@ public static int ComputeBoolSizeNoTag(bool value)
/// </summary>
public static int ComputeStringSizeNoTag(String value)
{
int byteArraySize = Encoding.UTF8.GetByteCount(value);
int byteArraySize = UTF8.GetByteCount(value);
return ComputeRawVarint32Size((uint) byteArraySize) +
byteArraySize;
}
Expand Down
18 changes: 15 additions & 3 deletions csharp/src/ProtocolBuffers/CodedOutputStream.cs
Expand Up @@ -58,6 +58,8 @@ namespace Google.Protobuf
/// </remarks>
public sealed partial class CodedOutputStream : ICodedOutputStream
{
private static readonly Encoding UTF8 = Encoding.UTF8;

/// <summary>
/// The buffer size used by CreateInstance(Stream).
/// </summary>
Expand Down Expand Up @@ -294,16 +296,26 @@ public void WriteString(int fieldNumber, string fieldName, string value)
WriteTag(fieldNumber, WireFormat.WireType.LengthDelimited);
// Optimise the case where we have enough space to write
// the string directly to the buffer, which should be common.
int length = Encoding.UTF8.GetByteCount(value);
int length = UTF8.GetByteCount(value);
WriteRawVarint32((uint) length);
if (limit - position >= length)
{
Encoding.UTF8.GetBytes(value, 0, value.Length, buffer, position);
if (length == value.Length) // Must be all ASCII...
{
for (int i = 0; i < length; i++)
{
buffer[position + i] = (byte)value[i];
}
}
else
{
UTF8.GetBytes(value, 0, value.Length, buffer, position);
}
position += length;
}
else
{
byte[] bytes = Encoding.UTF8.GetBytes(value);
byte[] bytes = UTF8.GetBytes(value);
WriteRawBytes(bytes);
}
}
Expand Down

0 comments on commit 35e4dbd

Please sign in to comment.