-
Notifications
You must be signed in to change notification settings - Fork 105
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
* C implementation of the ObjectId spec * java implementation of ObjectId generation * add tests per object-id spec * add fallback to rand() when arc4random isn't available (*sadface*) * handle variable length buffers properly * for safety's sake, let's not hardcode the size of an integer * target/source version need to be optional * ProcessHandle isn't availble on our testing platforms Also, since Java can't fork, it might not even be necessary to test for the pid changing
- Loading branch information
Showing
10 changed files
with
339 additions
and
142 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
# rubocop:todo all | ||
require "mkmf" | ||
$CFLAGS << " -Wall -g -std=c99" | ||
create_makefile("bson_native") | ||
require 'mkmf' | ||
|
||
$CFLAGS << ' -Wall -g -std=c99' | ||
have_func 'arc4random' | ||
|
||
create_makefile('bson_native') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,31 +29,69 @@ void rb_bson_generate_machine_id(VALUE rb_md5_class, char *rb_bson_machine_id) | |
|
||
/** | ||
* Generate the next object id. | ||
* | ||
* Specification: | ||
* https://github.com/mongodb/specifications/blob/master/source/objectid.rst | ||
* | ||
* The ObjectID BSON type is a 12-byte value consisting of three different portions (fields): | ||
* * a 4-byte value representing the seconds since the Unix epoch in the highest order bytes, | ||
* * a 5-byte random number unique to a machine and process, | ||
* * a 3-byte counter, starting with a random value. | ||
*/ | ||
VALUE rb_bson_object_id_generator_next(int argc, VALUE* args, VALUE self) | ||
{ | ||
char bytes[12]; | ||
uint32_t t; | ||
uint32_t c; | ||
uint16_t pid = BSON_UINT16_TO_BE(getpid()); | ||
uint32_t time_component; | ||
uint8_t* random_component; | ||
uint32_t counter_component; | ||
VALUE timestamp; | ||
VALUE rb_bson_object_id_class; | ||
|
||
if (argc == 0 || (argc == 1 && *args == Qnil)) { | ||
t = BSON_UINT32_TO_BE((int) time(NULL)); | ||
} | ||
else { | ||
t = BSON_UINT32_TO_BE(NUM2ULONG(rb_funcall(*args, rb_intern("to_i"), 0))); | ||
} | ||
rb_bson_object_id_class = pvt_const_get_2("BSON", "ObjectId"); | ||
|
||
/* "Drivers SHOULD have an accessor method on an ObjectID class for | ||
* obtaining the timestamp value." */ | ||
|
||
timestamp = rb_funcall(rb_bson_object_id_class, rb_intern("timestamp"), 0); | ||
time_component = BSON_UINT32_TO_BE(NUM2INT(timestamp)); | ||
|
||
/* "A 5-byte field consisting of a random value generated once per process. | ||
* This random value is unique to the machine and process. | ||
* | ||
* "Drivers MUST NOT have an accessor method on an ObjectID class for | ||
* obtaining this value." | ||
*/ | ||
|
||
random_component = pvt_get_object_id_random_value(); | ||
|
||
/* shift left 8 bits, so that the first three bytes of the result are | ||
* the meaningful ones */ | ||
counter_component = BSON_UINT32_TO_BE(rb_bson_object_id_counter << 8); | ||
|
||
memcpy(&bytes, &time_component, 4); | ||
memcpy(&bytes[4], random_component, 5); | ||
memcpy(&bytes[9], &counter_component, 3); | ||
|
||
c = BSON_UINT32_TO_BE(rb_bson_object_id_counter << 8); | ||
rb_bson_object_id_counter = (rb_bson_object_id_counter + 1) % 0x1000000; | ||
|
||
memcpy(&bytes, &t, 4); | ||
memcpy(&bytes[4], rb_bson_machine_id_hash, 3); | ||
memcpy(&bytes[7], &pid, 2); | ||
memcpy(&bytes[9], &c, 3); | ||
rb_bson_object_id_counter++; | ||
return rb_str_new(bytes, 12); | ||
} | ||
|
||
/** | ||
* Reset the counter. This is purely as an aid for testing. | ||
* | ||
* @param [ Integer ] i the value to set the counter to (default is 0) | ||
*/ | ||
VALUE rb_bson_object_id_generator_reset_counter(int argc, VALUE* args, VALUE self) { | ||
switch(argc) { | ||
case 0: rb_bson_object_id_counter = 0; break; | ||
case 1: rb_bson_object_id_counter = FIX2INT(args[0]); break; | ||
default: rb_raise(rb_eArgError, "Expected 0 or 1 arguments, got %d", argc); | ||
} | ||
|
||
return T_NIL; | ||
} | ||
|
||
/** | ||
* Returns a Ruby constant nested one level, e.g. BSON::Document. | ||
*/ | ||
|
@@ -77,7 +115,7 @@ VALUE pvt_const_get_3(const char *c1, const char *c2, const char *c3) { | |
int pvt_get_mode_option(int argc, VALUE *argv) { | ||
VALUE opts; | ||
VALUE mode; | ||
|
||
rb_scan_args(argc, argv, ":", &opts); | ||
if (NIL_P(opts)) { | ||
return BSON_MODE_DEFAULT; | ||
|
@@ -93,3 +131,63 @@ int pvt_get_mode_option(int argc, VALUE *argv) { | |
} | ||
} | ||
} | ||
|
||
/** | ||
* Returns the random number associated with this host and process. If the | ||
* process ID changes (e.g. via fork), this will detect the change and | ||
* generate another random number. | ||
*/ | ||
uint8_t* pvt_get_object_id_random_value() { | ||
static pid_t remembered_pid = 0; | ||
static uint8_t remembered_value[BSON_OBJECT_ID_RANDOM_VALUE_LENGTH] = {0}; | ||
pid_t pid = getpid(); | ||
|
||
if (remembered_pid != pid) { | ||
remembered_pid = pid; | ||
pvt_rand_buf(remembered_value, BSON_OBJECT_ID_RANDOM_VALUE_LENGTH, pid); | ||
} | ||
|
||
return remembered_value; | ||
} | ||
|
||
/** | ||
* Fills the buffer with random bytes. If arc4random is available, it is used, | ||
* otherwise a less-ideal fallback is used. | ||
*/ | ||
void pvt_rand_buf(uint8_t* bytes, int len, int pid) { | ||
#if HAVE_ARC4RANDOM | ||
arc4random_buf(bytes, len); | ||
This comment has been minimized.
Sorry, something went wrong.
This comment has been minimized.
Sorry, something went wrong.
jamis
Author
Contributor
|
||
#else | ||
time_t t; | ||
uint32_t seed; | ||
int ofs = 0; | ||
|
||
/* TODO: spec says to include hostname as part of the seed */ | ||
t = time(NULL); | ||
seed = ((uint32_t)t << 16) + ((uint32_t)pid % 0xFFFF); | ||
srand(seed); | ||
|
||
while (ofs < len) { | ||
int n = rand(); | ||
unsigned remaining = len - ofs; | ||
|
||
if (remaining > sizeof(n)) remaining = sizeof(n); | ||
memcpy(bytes+ofs, &n, remaining); | ||
|
||
ofs += remaining; | ||
} | ||
#endif | ||
} | ||
|
||
/** | ||
* Returns a random integer between 0 and INT_MAX. If arc4random is available, | ||
* it is used, otherwise a less-ideal fallback is used. | ||
*/ | ||
int pvt_rand() { | ||
#if HAVE_ARC4RANDOM | ||
return arc4random(); | ||
#else | ||
srand((unsigned)time(NULL)); | ||
return rand(); | ||
#endif | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,11 +1,58 @@ | ||
# rubocop:todo all | ||
module Utils | ||
extend self | ||
|
||
# JRuby chokes when strings like "\xfe\x00\xff", which are not valid UTF-8, | ||
# appear in the source. Use this method to build such strings. | ||
# char_array is an array of byte values to use for the string. | ||
module_function def make_byte_string(char_array, encoding = 'BINARY') | ||
def make_byte_string(char_array, encoding = 'BINARY') | ||
char_array.map do |char| | ||
char.chr.force_encoding('BINARY') | ||
end.join.force_encoding(encoding) | ||
end | ||
|
||
# Forks the current process and executes the given block in the child. | ||
# The value returned by the block is then returned in the parent process | ||
# by this method. | ||
# | ||
# @return [ Object ] the value returned by the block | ||
def perform_in_child(&block) | ||
reader, writer = IO.pipe | ||
|
||
if fork | ||
parent_worker(reader, writer) | ||
else | ||
child_worker(reader, writer, &block) | ||
end | ||
end | ||
|
||
private | ||
|
||
# A utility method for #perform_in_child, to handle tasks for the parent | ||
# side of the fork. | ||
# | ||
# @param [ IO ] reader The reader IO for the pipe | ||
# @param [ IO ] writer The writer IO for the pipe | ||
# | ||
# @return [ Object ] the value returned by the child process | ||
def parent_worker(reader, writer) | ||
writer.close | ||
blob = reader.read | ||
reader.close | ||
Process.wait | ||
Marshal.load(blob) | ||
end | ||
|
||
# A utility method for #perform_in_child, to handle tasks for the child | ||
# side of the fork. | ||
# | ||
# @param [ IO ] reader The reader IO for the pipe | ||
# @param [ IO ] writer The writer IO for the pipe | ||
def child_worker(reader, writer, &block) | ||
reader.close | ||
result = block.call | ||
writer.write Marshal.dump(result) | ||
writer.close | ||
exit! 0 | ||
end | ||
end |
Oops, something went wrong.
Comment from the sidelines: I think using
getentropy
here would yield the same result, but would expand the covered user base.from what I could gather,
arc4random_buf
has been added to glibc 2.36 (2022), whilegetentropy
has been available since 2.25 (2017)Context: I was in the process of temporarily backporting this commit locally so we could stop having such a big amount of id collision when I noticed that our ruby image is based on debian bullseye, which ships glibc2.31, so it would have fallen back to the rand seeded with time and pid.
getentropy
is available in that libc, so I ended up using that.(in our case it is pretty bad since pid is always 1 -- container fun!. meaning that 2 containers started at the same second have a high chance of generating collisions)
cc @jamis