Permalink
Browse files

C extension for Varint encode/decode methods.

It's the biggest cpu hog in ruby 1.8.x.

The gem builds the extension correctly, but the .deb doesn't yet.
  • Loading branch information...
1 parent e312f20 commit 51c0052cafcc40d9bd497f3fd43e3d7df199eecc Brian Palmer committed Jun 26, 2009
Showing with 101 additions and 30 deletions.
  1. +4 −1 Rakefile
  2. +3 −0 ext/extconf.rb
  3. +65 −0 ext/varint.c
  4. +0 −5 lib/protobuf/ext/io/getbyte.rb
  5. +29 −24 lib/protobuf/message/field.rb
View
@@ -10,8 +10,11 @@ spec = Gem::Specification.new do |s|
s.platform = Gem::Platform::RUBY
s.summary = "Ruby compiler and runtime for the google protocol buffers library. Currently includes a compiler based on protoc, as well as a highly experimental pure-ruby compiler."
- s.files = FileList["{bin,lib}/**/*"].to_a
+ s.required_ruby_version = ">=1.8.6"
+
+ s.files = FileList["{bin,lib,ext}/**/*"].to_a
s.require_path = 'lib'
+ s.extensions << 'ext/extconf.rb'
end
Rake::GemPackageTask.new(spec) do |pkg|
View
@@ -0,0 +1,3 @@
+require 'mkmf'
+
+create_makefile "ruby_protobufs"
View
@@ -0,0 +1,65 @@
+#include "ruby.h"
+
+static VALUE Protobuf, Varint;
+static ID getbyte, putbyte;
+
+static VALUE varint_encode(VALUE module, VALUE io, VALUE int_valV)
+{
+ /* unsigned for the bit shifting ops */
+ unsigned long long int_val = (unsigned long long)NUM2LL(int_valV);
+ unsigned char byte;
+ while (1) {
+ byte = int_val & 0x7f;
+ int_val >>= 7;
+ if (int_val == 0) {
+ rb_funcall(io, putbyte, 1, INT2FIX(byte));
+ return Qnil;
+ } else {
+ rb_funcall(io, putbyte, 1, INT2FIX(byte | 0x80));
+ }
+ }
+}
+
+static VALUE varint_decode(VALUE module, VALUE io)
+{
+ unsigned long long int_val = 0;
+ unsigned shift = 0;
+ unsigned char byte;
+
+ while (1) {
+ if (shift >= 64) {
+ rb_raise(rb_eArgError, "too many bytes when decoding varint");
+ }
+ byte = (unsigned char)FIX2INT(rb_funcall(io, getbyte, 0));
+ int_val |= ((unsigned long long)(byte & 0x7f)) << shift;
+ shift += 7;
+ if ((byte & 0x80) == 0) {
+ /* return ULL2NUM(int_val); */
+ return LL2NUM((long long)int_val);
+ }
+ }
+}
+
+void Init_ruby_protobufs()
+{
+ Protobuf = rb_define_module("Protobuf");
+ Varint = rb_define_module_under(Protobuf, "Varint");
+
+ VALUE zero = INT2FIX(0);
+ VALUE test_io = rb_class_new_instance(1, &zero,
+ rb_const_get(rb_cObject, rb_intern("IO")));
+
+ /* hackish way to support both 1.8.6 and 1.8.7+ */
+ getbyte = rb_intern("getbyte");
+ if (!rb_respond_to(test_io, getbyte)) {
+ getbyte = rb_intern("getc");
+ }
+
+ /* TODO: check the api docs -- what happens to test_io here?
+ * does it just leak? */
+
+ putbyte = rb_intern("putc");
+
+ rb_define_module_function(Varint, "encode", varint_encode, 2);
+ rb_define_module_function(Varint, "decode", varint_decode, 1);
+}
@@ -1,5 +0,0 @@
-if IO.instance_methods.grep(/^getbyte$/).empty?
- class IO
- alias_method :getbyte, :getc
- end
-end
@@ -1,6 +1,6 @@
# TODO: types are not checked for repeated fields
-require 'protobuf/ext/io/getbyte'
+require 'ruby_protobufs'
module Protobuf
class InvalidFieldValue < StandardError; end
@@ -15,34 +15,39 @@ module WireTypes
end
module Varint
+ # encode/decode methods defined in ext/varint.c
- def self.encode(io, int_val)
- if int_val < 0
- # negative varints are always encoded with the full 10 bytes
- int_val = int_val & 0xffffffff_ffffffff
- end
- loop do
- byte = int_val & 0b0111_1111
- int_val >>= 7
- if int_val == 0
- io << byte.chr
- break
- else
- io << (byte | 0b1000_0000).chr
+ if self.methods.grep(/^encode$/).empty?
+ def self.encode(io, int_val)
+ if int_val < 0
+ # negative varints are always encoded with the full 10 bytes
+ int_val = int_val & 0xffffffff_ffffffff
+ end
+ loop do
+ byte = int_val & 0b0111_1111
+ int_val >>= 7
+ if int_val == 0
+ io << byte.chr
+ break
+ else
+ io << (byte | 0b1000_0000).chr
+ end
end
end
end
- def self.decode(io)
- int_val = 0
- shift = 0
- loop do
- raise("Too many bytes when decoding varint") if shift >= 64
- byte = io.getbyte
- int_val |= (byte & 0b0111_1111) << shift
- shift += 7
- # int_val -= (1 << 64) if int_val > UINT64_MAX
- return int_val if (byte & 0b1000_0000) == 0
+ if self.methods.grep(/^decode$/).empty?
+ def self.decode(io)
+ int_val = 0
+ shift = 0
+ loop do
+ raise("Too many bytes when decoding varint") if shift >= 64
+ byte = io.getbyte
+ int_val |= (byte & 0b0111_1111) << shift
+ shift += 7
+ # int_val -= (1 << 64) if int_val > UINT64_MAX
+ return int_val if (byte & 0b1000_0000) == 0
+ end
end
end

0 comments on commit 51c0052

Please sign in to comment.