From 030d029195869c2191f5aa0d2b9213dcf3797028 Mon Sep 17 00:00:00 2001
From: Jonathan Metter <JMetter@gmx.de>
Date: Tue, 21 Feb 2017 17:18:12 +0100
Subject: [PATCH 01/18] Move aggregate tests to a new file

For the upcoming tests, I need a third column in the 'foo' table -- or
alternatively a second table. Extending the existing table in
`test_integration.rb` let some other tests fail. Given that
`test_integration.rb` is already lengthy, I thought it would make
things clearer if I separate the aggregate tests into their own file
with their own setup/teardown methods.
---
 test/test_integration.rb           | 81 ------------------------
 test/test_integration_aggregate.rb | 98 ++++++++++++++++++++++++++++++
 2 files changed, 98 insertions(+), 81 deletions(-)
 create mode 100644 test/test_integration_aggregate.rb

diff --git a/test/test_integration.rb b/test/test_integration.rb
index 2f94691b..d5ea3a53 100644
--- a/test/test_integration.rb
+++ b/test/test_integration.rb
@@ -499,87 +499,6 @@ def test_create_function
     assert_match( />>>.*<<</, value )
   end
 
-  def test_create_aggregate_without_block
-    step = proc do |ctx,a|
-      ctx[:sum] ||= 0
-      ctx[:sum] += a.to_i
-    end
-
-    final = proc { |ctx| ctx.result = ctx[:sum] }
-
-    @db.create_aggregate( "accumulate", 1, step, final )
-
-    value = @db.get_first_value( "select accumulate(a) from foo" )
-    assert_equal 6, value
-
-    # calling #get_first_value twice don't add up to the latest result
-    value = @db.get_first_value( "select accumulate(a) from foo" )
-    assert_equal 6, value
-  end
-
-  def test_create_aggregate_with_block
-    @db.create_aggregate( "accumulate", 1 ) do
-      step do |ctx,a|
-        ctx[:sum] ||= 0
-        ctx[:sum] += a.to_i
-      end
-
-      finalize { |ctx| ctx.result = ctx[:sum] }
-    end
-
-    value = @db.get_first_value( "select accumulate(a) from foo" )
-    assert_equal 6, value
-  end
-
-  def test_create_aggregate_with_no_data
-    @db.create_aggregate( "accumulate", 1 ) do
-      step do |ctx,a|
-        ctx[:sum] ||= 0
-        ctx[:sum] += a.to_i
-      end
-
-      finalize { |ctx| ctx.result = ctx[:sum] || 0 }
-    end
-
-    value = @db.get_first_value(
-      "select accumulate(a) from foo where a = 100" )
-    assert_equal 0, value
-  end
-
-  class AggregateHandler
-    class << self
-      def arity; 1; end
-      def text_rep; SQLite3::Constants::TextRep::ANY; end
-      def name; "multiply"; end
-    end
-    def step(ctx, a)
-      ctx[:buffer] ||= 1
-      ctx[:buffer] *= a.to_i
-    end
-    def finalize(ctx); ctx.result = ctx[:buffer]; end
-  end
-
-  def test_aggregate_initialized_twice
-    initialized = 0
-    handler = Class.new(AggregateHandler) do
-      define_method(:initialize) do
-        initialized += 1
-        super()
-      end
-    end
-
-    @db.create_aggregate_handler handler
-    @db.get_first_value( "select multiply(a) from foo" )
-    @db.get_first_value( "select multiply(a) from foo" )
-    assert_equal 2, initialized
-  end
-
-  def test_create_aggregate_handler
-    @db.create_aggregate_handler AggregateHandler
-    value = @db.get_first_value( "select multiply(a) from foo" )
-    assert_equal 6, value
-  end
-
   def test_bind_array_parameter
     result = @db.get_first_value( "select b from foo where a=? and b=?",
       [ 1, "foo" ] )
diff --git a/test/test_integration_aggregate.rb b/test/test_integration_aggregate.rb
new file mode 100644
index 00000000..75ca0466
--- /dev/null
+++ b/test/test_integration_aggregate.rb
@@ -0,0 +1,98 @@
+require 'helper'
+
+class TC_Integration_Aggregate < SQLite3::TestCase
+  def setup
+    @db = SQLite3::Database.new(":memory:")
+    @db.transaction do
+      @db.execute "create table foo ( a integer primary key, b text, c integer )"
+      @db.execute "insert into foo ( b, c ) values ( 'foo', 10 )"
+      @db.execute "insert into foo ( b, c ) values ( 'bar', 11 )"
+      @db.execute "insert into foo ( b, c ) values ( 'baz', 12 )"
+    end
+  end
+
+  def teardown
+    @db.close
+  end
+
+  def test_create_aggregate_without_block
+    step = proc do |ctx,a|
+      ctx[:sum] ||= 0
+      ctx[:sum] += a.to_i
+    end
+
+    final = proc { |ctx| ctx.result = ctx[:sum] }
+
+    @db.create_aggregate( "accumulate", 1, step, final )
+
+    value = @db.get_first_value( "select accumulate(a) from foo" )
+    assert_equal 6, value
+
+    # calling #get_first_value twice don't add up to the latest result
+    value = @db.get_first_value( "select accumulate(a) from foo" )
+    assert_equal 6, value
+  end
+
+  def test_create_aggregate_with_block
+    @db.create_aggregate( "accumulate", 1 ) do
+      step do |ctx,a|
+        ctx[:sum] ||= 0
+        ctx[:sum] += a.to_i
+      end
+
+      finalize { |ctx| ctx.result = ctx[:sum] }
+    end
+
+    value = @db.get_first_value( "select accumulate(a) from foo" )
+    assert_equal 6, value
+  end
+
+  def test_create_aggregate_with_no_data
+    @db.create_aggregate( "accumulate", 1 ) do
+      step do |ctx,a|
+        ctx[:sum] ||= 0
+        ctx[:sum] += a.to_i
+      end
+
+      finalize { |ctx| ctx.result = ctx[:sum] || 0 }
+    end
+
+    value = @db.get_first_value(
+      "select accumulate(a) from foo where a = 100" )
+    assert_equal 0, value
+  end
+
+  class AggregateHandler
+    class << self
+      def arity; 1; end
+      def text_rep; SQLite3::Constants::TextRep::ANY; end
+      def name; "multiply"; end
+    end
+    def step(ctx, a)
+      ctx[:buffer] ||= 1
+      ctx[:buffer] *= a.to_i
+    end
+    def finalize(ctx); ctx.result = ctx[:buffer]; end
+  end
+
+  def test_aggregate_initialized_twice
+    initialized = 0
+    handler = Class.new(AggregateHandler) do
+      define_method(:initialize) do
+        initialized += 1
+        super()
+      end
+    end
+
+    @db.create_aggregate_handler handler
+    @db.get_first_value( "select multiply(a) from foo" )
+    @db.get_first_value( "select multiply(a) from foo" )
+    assert_equal 2, initialized
+  end
+
+  def test_create_aggregate_handler
+    @db.create_aggregate_handler AggregateHandler
+    value = @db.get_first_value( "select multiply(a) from foo" )
+    assert_equal 6, value
+  end
+end

From ae89dea20a2bb17846f83b3d71d112ead671b171 Mon Sep 17 00:00:00 2001
From: Jonathan Metter <JMetter@gmx.de>
Date: Tue, 21 Feb 2017 20:57:18 +0100
Subject: [PATCH 02/18] tests to demonstrate issues with aggregate function
 handling

Added tests to demonstrate three issues with the way sqlite3-ruby
currently handles aggregate functions:

(1) Defining a second aggregate function and using both results in
memory violation. The problem is that `rb_iv_set(self, "@agregator",
aggregator);`   (database.c:399) just overwrites the last reference to
the first  AggregateHandler, but SQLite still holds a pointer to it and
will follow   the pointer if the first aggregate function is used in a
query.
  The most straight-forward fix is to insert the aggregator in a ruby
array and never release it -- similar to how its done for functions.

(2) Using the same aggregate function twice in a query mixes up values
from both  columns. For example:
  `SELECT MYAGGREG(a), MYAGGREG(b) FROM foo;`
  Leads to: Myaggreg.step(a1); Myaggreg.step(b1); Myaggreg.step(a2);
  Myaggreg.step(b2); ... ; Myaggreg.finalize(), Myaggreg.finalize()
  The SQLite API expects the caller to differentiate between these
chains  of invocation via `sqlite3_aggregate_context()`, but current
sqlite3-ruby  does not account for that.
  #44 has been a work around for this in the special case that the first
  aggregation is finalized before the second started (separate queries).
  #161 does the analog for the function Proxy.

(3) Documentation implies that library users shall explicitly set the
arity of the function. Consequently the library user is likely to
expect that this is passed down to SQLite, so he may define multiple
functions with the same name but different arity and SQLite to use the
most appropriate one  (documented sqlite feature). Unfortunately,
sqlite3-ruby does not pass the arity to SQLite, which is surprising
given that different arity-values  are accounted for in the C code. The
problem is that sqlite3-ruby does not call the AggregateHandlers
"arity" method but instead queries the arity of
  the "step" method from Ruby (sqlite3_obj_method_arity). Confusingly,
this  is not the "step" provided by the library user, but the "step" of
the  Proxy Wrapper classes introduced by Database.create_aggregate or
Database.create_aggregate_handler. Both of these are just
`step(*args)`,  so the arity reported to SQLite is always -1.

Things not addressed:
  - #164 impossible to call FunctionProxy.set_error (and
FunctionProxy.count).  because @driver does not exist (anymore)
  - Database.create_aggregate and the tests contain a 'text_rep' field,
that  is never passed down to SQLite. It is not advertised in the
documentation, though.
---
 test/test_integration_aggregate.rb | 148 +++++++++++++++++++++++++++++
 1 file changed, 148 insertions(+)

diff --git a/test/test_integration_aggregate.rb b/test/test_integration_aggregate.rb
index 75ca0466..ed722d22 100644
--- a/test/test_integration_aggregate.rb
+++ b/test/test_integration_aggregate.rb
@@ -47,6 +47,127 @@ def test_create_aggregate_with_block
     assert_equal 6, value
   end
 
+  def test_create_aggregate_with_the_same_function_twice_in_a_query
+    @db.create_aggregate( "accumulate", 1 ) do
+      step do |ctx,a|
+        ctx[:sum] ||= 0
+        ctx[:sum] += a.to_i
+      end
+
+      finalize { |ctx| ctx.result = ctx[:sum] }
+    end
+
+    values = @db.get_first_row( "select accumulate(a), accumulate(c) from foo" )
+    assert_equal 6, values[0]
+    assert_equal 33, values[1]
+  end
+
+  def test_create_aggregate_with_two_different_functions
+    @db.create_aggregate( "accumulate", 1 ) do
+      step do |ctx,a|
+        ctx[:sum] ||= 0
+        ctx[:sum] += a.to_i
+      end
+
+      finalize { |ctx| ctx.result = ctx[:sum] }
+    end
+
+    @db.create_aggregate( "multiply", 1 ) do
+      step do |ctx,a|
+        ctx[:sum] ||= 1
+        ctx[:sum] *= a.to_i
+      end
+
+      finalize { |ctx| ctx.result = ctx[:sum] }
+    end
+
+    # This is likely to crash, as ruby-sqlite overwrites its only reference
+    # to the "accumulate" handler with the "multiply" handler, although
+    # SQLite still holds a pointer which it follows on next select.
+    #GC.start
+
+    values = @db.get_first_row( "select accumulate(a), multiply(c) from foo" )
+    assert_equal 6, values[0]
+    assert_equal 1320, values[1]
+
+    value = @db.get_first_value( "select accumulate(c) from foo")
+    assert_equal 33, value
+
+    value = @db.get_first_value( "select multiply(a) from foo")
+    assert_equal 6, value
+  end
+
+  def test_create_aggregate_overwrite_function
+    @db.create_aggregate( "accumulate", 1 ) do
+      step do |ctx,a|
+        ctx[:sum] ||= 0
+        ctx[:sum] += a.to_i
+      end
+
+      finalize { |ctx| ctx.result = ctx[:sum] }
+    end
+
+    value = @db.get_first_value( "select accumulate(c) from foo")
+    assert_equal 33, value
+
+    GC.start
+
+    @db.create_aggregate( "accumulate", 1 ) do
+      step do |ctx,a|
+        ctx[:sum] ||= 1
+        ctx[:sum] *= a.to_i
+      end
+
+      finalize { |ctx| ctx.result = ctx[:sum] }
+    end
+
+    value = @db.get_first_value( "select accumulate(c) from foo")
+    assert_equal 1320, value
+  end
+
+  def test_create_aggregate_overwrite_function_with_different_arity
+    @db.create_aggregate( "accumulate", -1 ) do
+      step do |ctx,*args|
+        ctx[:sum] ||= 0
+        args.each { |a| ctx[:sum] += a.to_i }
+      end
+
+      finalize { |ctx| ctx.result = ctx[:sum] }
+    end
+
+    @db.create_aggregate( "accumulate", 2 ) do
+      step do |ctx,a,b|
+        ctx[:sum] ||= 1
+        ctx[:sum] *= (a.to_i + b.to_i)
+      end
+
+      finalize { |ctx| ctx.result = ctx[:sum] }
+    end
+
+    GC.start
+
+    values = @db.get_first_row( "select accumulate(c), accumulate(a,c) from foo")
+    assert_equal 33, values[0]
+    assert_equal 39, values[1]
+  end
+
+  class CustomException < Exception
+  end
+
+  def test_create_aggregate_with_exception
+    @db.create_aggregate( "raiseexception", 1 ) do
+      step do |ctx,a|
+        raise CustomException.new( "bogus aggregate handler" )
+      end
+
+      finalize { |ctx| ctx.result = 42 }
+    end
+
+    assert_raise CustomException do
+      @db.get_first_value( "select raiseexception(a) from foo")
+    end
+  end
+
   def test_create_aggregate_with_no_data
     @db.create_aggregate( "accumulate", 1 ) do
       step do |ctx,a|
@@ -90,6 +211,33 @@ def test_aggregate_initialized_twice
     assert_equal 2, initialized
   end
 
+  def test_create_aggregate_handler_call_with_wrong_arity
+    @db.create_aggregate_handler AggregateHandler
+
+    assert_raise (SQLite3::SQLException) do
+     @db.get_first_value( "select multiply(a,c) from foo" )
+   end
+  end
+
+  class RaiseExceptionAggregateHandler
+    class << self
+      def arity; 1; end
+      def text_rep; SQLite3::Constants::TextRep::ANY; end
+      def name; "raiseexception"; end
+    end
+    def step(ctx, a)
+      raise CustomException.new( "bogus aggregate handler" )
+    end
+    def finalize(ctx); ctx.result = nil; end
+  end
+
+  def test_create_aggregate_handler_with_exception
+    @db.create_aggregate_handler RaiseExceptionAggregateHandler
+    assert_raise CustomException do
+      @db.get_first_value( "select raiseexception(a) from foo")
+    end
+  end
+
   def test_create_aggregate_handler
     @db.create_aggregate_handler AggregateHandler
     value = @db.get_first_value( "select multiply(a) from foo" )

From 39e553b34bc9fa7fde2307859a7584ed2af90717 Mon Sep 17 00:00:00 2001
From: Jonathan Metter <JMetter@gmx.de>
Date: Sat, 25 Feb 2017 20:02:44 +0100
Subject: [PATCH 03/18] simple linked list implementation

To manage the active references to the aggregate handler and its
instances, I need some list/array/vector. I have used Ruby's array for
that, but wrapping the rb_sqlite3_aggregator_wrapper with a new, hidden
Ruby class turned out more complex than I initially expected.
Especially the undefined finalization order of the GC posed problems on
shutdown. Consequently I decided that it would be better to handle the
datastructures completely in C there I have full control over
the destruction order.
---
 ext/sqlite3/list.h | 188 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 188 insertions(+)
 create mode 100644 ext/sqlite3/list.h

diff --git a/ext/sqlite3/list.h b/ext/sqlite3/list.h
new file mode 100644
index 00000000..043de81b
--- /dev/null
+++ b/ext/sqlite3/list.h
@@ -0,0 +1,188 @@
+#ifndef SQLITE3_LIST_RUBY
+#define SQLITE3_LIST_RUBY
+
+#include <assert.h>
+
+/* A simple doubly-linked list implementation with a sentinel node.
+ *
+ * Linked-Lists are tricky and this implementation aims to be easy for a
+ * programmer to check and protect against most kinds of API-abuse with
+ * assertions. It is inspired by the linux/list.h, but avoids macros and
+ * offsetof(). Iteration is done via an explicti iterator instead of a
+ * FOREACH macro.
+ *
+ * General use:
+ * You have some "owner" or "parent" structure that holds/owns the list:
+ *
+ * struct parent {
+ *   // other stuff
+ *   rb_sqlite3_list_head_t foo_list;
+ *   // more other stuff
+ * };
+ *
+ * and a struct for the "foo" list (child) elements
+ *
+ * struct foo {
+ *    rb_sqlite3_list_elem list;
+ *    // other stuff
+ * };
+ *
+ * I recommend putting the rb_sqlite3_list_elem first in the child so you can
+ * freely cast between struct foo* and rb_sqlite3_list_elem*. If you put
+ * rb_sqlite3_list_elem somewhere else you need to do the offsetof calculation
+ * yourself.
+ *
+ * Make sure that both struct parent and struct foo do not move in memory, e.g.
+ * allocate on the heap.
+ * Also make sure that you initialize both the rb_sqlite3_list_head_t and
+ * the rb_sqlite3_list_elem_t before doing anything else with them.
+ *
+ * The generic clear_list operation looks like:
+ *
+ * rb_sqlite3_list_iter_t iter = rb_sqlite3_list_iter_new(&parent->head);
+ * rb_sqlite3_list_elem_t *e;
+ * while ((e = rb_sqlite3_list_iter_step(&iter))) {
+ *   rb_sqlite3_list_remove(e);
+ *   free(e);
+ * }
+ *
+ * The rest should be clear from the per-function comments below.
+ */
+
+typedef struct rb_sqlite3_list_elem
+{
+  struct rb_sqlite3_list_elem* next;
+  struct rb_sqlite3_list_elem* prev;
+} rb_sqlite3_list_elem_t;
+
+/* the head is a sentinel node, it dos not hold any data and should be placed
+ * into the object that "owns" the linked list. This struct exists, so that
+ * C's type system provides minimal protection against mixing up the head
+ * and the ordinary element nodes */
+typedef struct rb_sqlite3_list_head
+{
+  struct rb_sqlite3_list_elem elem;
+} rb_sqlite3_list_head_t;
+
+/* list iterators private data. Obtain a new iterator from
+ * rb_sqlite3_list_iter_new(). Now call rb_sqlite3_list_iter_step() on it
+ * until it returns NULL */
+typedef struct rb_sqlite3_list_iter
+{
+  struct rb_sqlite3_list_head* head; /* immutable. Always points at head */
+  struct rb_sqlite3_list_elem* next; /* returned by next invocation of step() */
+} rb_sqlite3_list_iter_t;
+
+/* initialize an element to the unconnected state. Should aways be the first
+ * thing to do with a rb_sqlite3_list_elem_t. */
+static inline void
+rb_sqlite3_list_elem_init(rb_sqlite3_list_elem_t* elem)
+{
+  elem->prev = elem;
+  elem->next = elem;
+}
+
+/* initialize a list head to the empty state. Should aways be the first
+ * thing to do with a rb_sqlite3_list_head_t */
+static inline void
+rb_sqlite3_list_head_init(rb_sqlite3_list_head_t* head)
+{
+  rb_sqlite3_list_elem_init(&head->elem);
+}
+
+/* return true iff the the list is empty. That is the only member of the
+ * list is the sentinel */
+static inline int
+rb_sqlite3_list_empty(rb_sqlite3_list_head_t* head)
+{
+  /* an unitialized list is neither empty nor occupied, so you may not call
+   * list_empty on it */
+  assert(head->elem.prev && head->elem.next);
+  return head->elem.next == &head->elem;
+}
+
+/* consider this an internel helper function. Rather define additional
+ * functions like rb_sqlite3_list_insert_after() or something like that
+ * if you need this functionaliy */
+static inline void
+rb_sqlite3_list_insert_between(rb_sqlite3_list_elem_t* prev_element,
+                               rb_sqlite3_list_elem_t* new_element,
+                               rb_sqlite3_list_elem_t* next_element)
+{
+  /* detect uninitialized prev_element or next_element in the common case
+   * that it is zero filled */
+  assert(prev_element->prev && prev_element->next);
+  assert(next_element->prev && next_element->next);
+
+  /* next_element must actually be the successor of previous element */
+  assert(prev_element->next == next_element);
+  assert(next_element->prev == prev_element);
+
+  /* our new_element must be in the initialized, unconnected state. This
+   * is joust to protect from the mistake of inserting an already inserted
+   * element into a second list, which would corrupt the first list */
+  assert(new_element->prev == new_element);
+
+  new_element->prev = prev_element;
+  new_element->next = next_element;
+  prev_element->next = new_element;
+  next_element->prev = new_element;
+}
+
+/* insert new_element at the tail of the list. In other words insert
+ * it before the head */
+static inline void
+rb_sqlite3_list_insert_tail(rb_sqlite3_list_head_t* head,
+                            rb_sqlite3_list_elem_t* new_element)
+{
+  rb_sqlite3_list_insert_between(head->elem.prev, new_element, &head->elem);
+}
+
+/* remove this element from a list. Thanks to this being a doubly-linked list,
+ * you don't need the head element of the list. Only elem itself and its
+ * direct neighbours are involved */
+static inline void
+rb_sqlite3_list_remove(rb_sqlite3_list_elem_t* elem)
+{
+  rb_sqlite3_list_elem_t* prev = elem->prev;
+  rb_sqlite3_list_elem_t* next = elem->next;
+
+  /* this assertion triggers if elem is not part of a linked list and elso
+   * detetcs zero-filled elem pointers */
+  assert(prev && prev != elem);
+
+  prev->next = next;
+  next->prev = prev;
+
+  /* avoid the common bug of reinserting uninitialized elems */
+  rb_sqlite3_list_elem_init(elem);
+}
+
+/* obtain a new iterator. */
+static inline rb_sqlite3_list_iter_t
+rb_sqlite3_list_iter_new(rb_sqlite3_list_head_t* head)
+{
+  /* again check if head looks initalized */
+  assert(head->elem.prev && head->elem.next);
+  rb_sqlite3_list_iter_t iter = {.head = head, .next = head->elem.next };
+  return iter;
+}
+
+/* Yield the next element from the iterator or NULL if at the end of the
+ * list. Once iteration is complete, will always return NULL.
+ * list step is safe in the sense that you may remove the returned element
+ * from the list and free() it. DO NOT MODIFY THE LIST IN ANY OTHER WAY. */
+static inline rb_sqlite3_list_elem_t*
+rb_sqlite3_list_iter_step(rb_sqlite3_list_iter_t* iter)
+{
+  rb_sqlite3_list_elem_t* tmp = iter->next;
+
+  if (iter->next == &iter->head->elem) {
+    return NULL;
+  }
+
+  iter->next = iter->next->next;
+  return tmp;
+}
+
+#endif

From e30a96b02a531e94aff4fd653a3965783891b2b3 Mon Sep 17 00:00:00 2001
From: Jonathan Metter <JMetter@gmx.de>
Date: Sun, 26 Feb 2017 22:06:17 +0100
Subject: [PATCH 04/18] fix invalid memory access

As I already wrote in an earlier commit message, the memory access
violation occurs after the library user installs a second aggregate
handler. In that case, Database.@aggregate is overwritten and
therefore no references to the AggregateHandler remain. The simplest
fix would be to change @aggregator (or @agregator) into a Ruby array of
all AggregateHandlers ever installed. I decided against that because I
think its cleaner when SQLite controls the lifetime of the handlers. To
that end the sqlite developers extended the sqlite3_create_function API
with the xDestroy callback. The extended "v2" Version is available
since 3.7.3 (2010-10-08). I installed a fallback to the old version,
should the new function be unavailable. While it is possible to remove
elements from an @aggregator ruby array while in xDestroy callback,
this was clumsy and difficult to do correctly under undefined
finalization order. I came to the conclusion that its easier to manage
the lifetime completely in C and only provide the appropriate
rb_gc_mark function to SQLite3::Database. The remaining issue is to
make the VALUEs for the AggregateHandlers (and later their instances)
visible to the mark function. To that end, I introduced the
aggregator_wrapper struct that holds the VALUE and the linked list
pointers. I replaced @aggregator with the _sqlite3Ruby.aggregators
linked list head.

I used valgrind to check that memory accesses are sane now. Before this
patch, I saw loads from previously free'd memory that disappeared.
There are still lots of warnings about use initialized values, but this
is unrelated and most likely an internal issue with MRI 2.0.0.

As part of this patch, I moved the aggregate function related code into
its own files "aggregator.[ch]".
---
 ext/sqlite3/aggregator.c           | 165 +++++++++++++++++++++++++++++
 ext/sqlite3/aggregator.h           |  35 ++++++
 ext/sqlite3/database.c             |  85 +++------------
 ext/sqlite3/database.h             |   4 +
 ext/sqlite3/extconf.rb             |   1 +
 test/test_integration_aggregate.rb |   7 +-
 6 files changed, 222 insertions(+), 75 deletions(-)
 create mode 100644 ext/sqlite3/aggregator.c
 create mode 100644 ext/sqlite3/aggregator.h

diff --git a/ext/sqlite3/aggregator.c b/ext/sqlite3/aggregator.c
new file mode 100644
index 00000000..3d339c7a
--- /dev/null
+++ b/ext/sqlite3/aggregator.c
@@ -0,0 +1,165 @@
+#include <aggregator.h>
+#include <database.h>
+
+static int
+rb_sqlite3_aggregator_obj_method_arity(VALUE obj, ID id)
+{
+  VALUE method = rb_funcall(obj, rb_intern("method"), 1, ID2SYM(id));
+  VALUE arity  = rb_funcall(method, rb_intern("arity"), 0);
+
+  return (int)NUM2INT(arity);
+}
+
+static void
+rb_sqlite3_aggregator_step(sqlite3_context * ctx, int argc, sqlite3_value **argv)
+{
+  aggregator_wrapper_t *aw = sqlite3_user_data(ctx);
+  VALUE * params = NULL;
+  int i;
+
+  if (argc > 0) {
+    params = xcalloc((size_t)argc, sizeof(VALUE *));
+    for(i = 0; i < argc; i++) {
+      params[i] = sqlite3val2rb(argv[i]);
+    }
+  }
+  rb_funcall2(aw->handler_klass, rb_intern("step"), argc, params);
+  xfree(params);
+}
+
+static void
+rb_sqlite3_aggregator_final(sqlite3_context * ctx)
+{
+  aggregator_wrapper_t *aw = sqlite3_user_data(ctx);
+  VALUE result = rb_funcall(aw->handler_klass, rb_intern("finalize"), 0);
+  set_sqlite3_func_result(ctx, result);
+}
+
+/* called both by sqlite3_create_function_v2's xDestroy-callback and
+ * rb_sqlite3_aggregator_destory_all. Unlinks an aggregate_wrapper and all
+ * its instances. The effect is that on the next run of
+ * rb_sqlite3_aggregator_mark() will not find the VALUEs for the
+ * AggregateHandler class and its instances anymore. */
+static void
+rb_sqlite3_aggregator_destroy(void *void_aw)
+{
+  aggregator_wrapper_t *aw = void_aw;
+  rb_sqlite3_list_iter_t iter = rb_sqlite3_list_iter_new(&aw->instances);
+  rb_sqlite3_list_elem_t *e;
+
+  while ((e = rb_sqlite3_list_iter_step(&iter))) {
+    rb_sqlite3_list_remove(e);
+    xfree(e);
+  }
+  rb_sqlite3_list_remove(&aw->list);
+  aw->handler_klass = Qnil;
+  xfree(aw);
+}
+
+/* called by rb_sqlite3_mark(), the mark function of Sqlite3::Database.
+ * Marks all the AggregateHandler classes and their instances that are
+ * currently in use. */
+void
+rb_sqlite3_aggregator_mark(sqlite3RubyPtr ctx)
+{
+  rb_sqlite3_list_iter_t iter = rb_sqlite3_list_iter_new(&ctx->aggregators);
+  aggregator_wrapper_t *aw;
+
+  while ((aw = (aggregator_wrapper_t*)rb_sqlite3_list_iter_step(&iter))) {
+    rb_gc_mark(aw->handler_klass);
+    /* TODO: mark instances */
+  }
+}
+
+/* called by sqlite3_rb_close or deallocate after sqlite3_close().
+ * At that point the library user can not invoke SQLite APIs any more, so
+ * SQLite can not call the AggregateHandlers callbacks any more. Consequently,
+ * Ruby's GC is free to release them. To us, this means we may drop the
+ * VALUE references by destorying all the remaining warppers
+ *
+ * Normally, the aggregators list should  be empty at that point
+ * because SQLIite should have already called sqlite3_create_function_v2's
+ * destroy callback of all registered aggregate functions. */
+void
+rb_sqlite3_aggregator_destory_all(sqlite3RubyPtr ctx)
+{
+  rb_sqlite3_list_iter_t iter = rb_sqlite3_list_iter_new(&ctx->aggregators);
+  aggregator_wrapper_t *aw;
+
+  while ((aw = (aggregator_wrapper_t*)rb_sqlite3_list_iter_step(&iter))) {
+    rb_sqlite3_aggregator_destroy(aw);
+  }
+}
+
+/* sqlite3_create_function_v2 is available since version 3.7.3 (2010-10-08).
+ * It features an additional xDestroy() callback that fires when sqlite does
+ * not need some user defined function anymore, e.g. when overwritten by
+ * another function with the same name.
+ * As this is just a memory optimization, we fall back to the old
+ * sqlite3_create_function if the new one is missing */
+int rb_sqlite3_create_function_v1or2(sqlite3 *db, const char *zFunctionName,
+  int nArg, int eTextRep, void *pApp,
+  void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
+  void (*xStep)(sqlite3_context*,int,sqlite3_value**),
+  void (*xFinal)(sqlite3_context*),
+  void(*xDestroy)(void*)
+)
+{
+#ifdef HAVE_SQLITE3_CREATE_FUNCTION_V2
+  return sqlite3_create_function_v2(db, zFunctionName, nArg, eTextRep, pApp,
+    xFunc, xStep, xFinal, xDestroy);
+#else
+  (void)xDestroy;
+  return sqlite3_create_function(db, zFunctionName, nArg, eTextRep, pApp,
+    xFunc, xStep, xFinal);
+#endif
+}
+
+/* call-seq: define_aggregator(name, aggregator)
+ *
+ * Define an aggregate function named +name+ using the object +aggregator+.
+ * +aggregator+ must respond to +step+ and +finalize+.  +step+ will be called
+ * with row information and +finalize+ must return the return value for the
+ * aggregator function.
+ */
+VALUE
+rb_sqlite3_define_aggregator(VALUE self, VALUE name, VALUE aggregator)
+{
+  /* define_aggregator is added as a method to SQLite3::Database in database.c */
+  sqlite3RubyPtr ctx;
+  int arity, status;
+
+  Data_Get_Struct(self, sqlite3Ruby, ctx);
+  if (!ctx->db) {
+    rb_raise(rb_path2class("SQLite3::Exception"), "cannot use a closed database");
+  }
+
+  arity = rb_sqlite3_aggregator_obj_method_arity(aggregator, rb_intern("step"));
+
+  aggregator_wrapper_t *aw = xcalloc((size_t)1, sizeof(aggregator_wrapper_t));
+  aw->handler_klass = aggregator;
+  rb_sqlite3_list_elem_init(&aw->list);
+  rb_sqlite3_list_head_init(&aw->instances);
+
+  status = rb_sqlite3_create_function_v1or2(
+    ctx->db,
+    StringValuePtr(name),
+    arity,
+    SQLITE_UTF8,
+    aw,
+    NULL,
+    rb_sqlite3_aggregator_step,
+    rb_sqlite3_aggregator_final,
+    rb_sqlite3_aggregator_destroy
+  );
+
+  if (status != SQLITE_OK) {
+    xfree(aw);
+    rb_sqlite3_raise(ctx->db, status);
+    return self; // just in case rb_sqlite3_raise returns.
+  }
+
+  rb_sqlite3_list_insert_tail(&ctx->aggregators, &aw->list);
+
+  return self;
+}
diff --git a/ext/sqlite3/aggregator.h b/ext/sqlite3/aggregator.h
new file mode 100644
index 00000000..de219f9b
--- /dev/null
+++ b/ext/sqlite3/aggregator.h
@@ -0,0 +1,35 @@
+#ifndef SQLITE3_AGGREGATOR_RUBY
+#define SQLITE3_AGGREGATOR_RUBY
+
+#include <sqlite3_ruby.h>
+
+/* the aggregator_wrapper holds a reference to the AggregateHandler class
+ * and a list to all instances of that class. The aggregator_wrappers
+ * themselves form the _sqlite3Ruby.aggregators list. The purpose of this
+ * construct is to make all the AggregateHandlers and their instances visible
+ * to the GC mark() function of SQLite3::Database.
+ * We remove aggregate handlers from this list once
+ * sqlite3_create_function_v2's xDestory() callback fires. On close of the
+ * database, we remove all remaining aggregate_wrappers. */
+typedef struct rb_sqlite3_aggregator_wrapper {
+  /* my node in the linked list of all aggregator wrappers. Relevant for
+   * the gc_mark function to find the handler_klass */
+  rb_sqlite3_list_elem_t list;
+
+  /* the AggregateHandler class we are wrapping here */
+  VALUE handler_klass;
+
+  /* linked list of all in-flight instances of the AggregateHandler klass. */
+  rb_sqlite3_list_head_t instances;
+} aggregator_wrapper_t;
+
+VALUE
+rb_sqlite3_define_aggregator(VALUE self, VALUE name, VALUE aggregator);
+
+void
+rb_sqlite3_aggregator_mark(sqlite3RubyPtr ctx);
+
+void
+rb_sqlite3_aggrgegator_destory_all(sqlite3RubyPtr ctx);
+
+#endif
diff --git a/ext/sqlite3/database.c b/ext/sqlite3/database.c
index 67d34b57..dfd358a3 100644
--- a/ext/sqlite3/database.c
+++ b/ext/sqlite3/database.c
@@ -1,4 +1,5 @@
 #include <sqlite3_ruby.h>
+#include <aggregator.h>
 
 #define REQUIRE_OPEN_DB(_ctxt) \
   if(!_ctxt->db) \
@@ -12,13 +13,20 @@ static void deallocate(void * ctx)
   sqlite3 * db     = c->db;
 
   if(db) sqlite3_close(db);
+  rb_sqlite3_aggregator_destory_all(ctx);
   xfree(c);
 }
 
+static void rb_sqlite3_mark(sqlite3RubyPtr ctx)
+{
+  rb_sqlite3_aggregator_mark(ctx);
+}
+
 static VALUE allocate(VALUE klass)
 {
   sqlite3RubyPtr ctx = xcalloc((size_t)1, sizeof(sqlite3Ruby));
-  return Data_Wrap_Struct(klass, NULL, deallocate, ctx);
+  rb_sqlite3_list_head_init(&ctx->aggregators);
+  return Data_Wrap_Struct(klass, rb_sqlite3_mark, deallocate, ctx);
 }
 
 static char *
@@ -73,6 +81,8 @@ static VALUE sqlite3_rb_close(VALUE self)
 
   ctx->db = NULL;
 
+  rb_sqlite3_aggregator_destory_all(ctx);
+
   return self;
 }
 
@@ -200,7 +210,7 @@ static VALUE last_insert_row_id(VALUE self)
   return LL2NUM(sqlite3_last_insert_rowid(ctx->db));
 }
 
-static VALUE sqlite3val2rb(sqlite3_value * val)
+VALUE sqlite3val2rb(sqlite3_value * val)
 {
   switch(sqlite3_value_type(val)) {
     case SQLITE_INTEGER:
@@ -230,7 +240,7 @@ static VALUE sqlite3val2rb(sqlite3_value * val)
   }
 }
 
-static void set_sqlite3_func_result(sqlite3_context * ctx, VALUE result)
+void set_sqlite3_func_result(sqlite3_context * ctx, VALUE result)
 {
   switch(TYPE(result)) {
     case T_NIL:
@@ -347,72 +357,6 @@ static VALUE define_function(VALUE self, VALUE name)
   return define_function_with_flags(self, name, SQLITE_UTF8);
 }
 
-static int sqlite3_obj_method_arity(VALUE obj, ID id)
-{
-  VALUE method = rb_funcall(obj, rb_intern("method"), 1, ID2SYM(id));
-  VALUE arity  = rb_funcall(method, rb_intern("arity"), 0);
-
-  return (int)NUM2INT(arity);
-}
-
-static void rb_sqlite3_step(sqlite3_context * ctx, int argc, sqlite3_value **argv)
-{
-  VALUE callable = (VALUE)sqlite3_user_data(ctx);
-  VALUE * params = NULL;
-  int i;
-
-  if (argc > 0) {
-    params = xcalloc((size_t)argc, sizeof(VALUE *));
-    for(i = 0; i < argc; i++) {
-      params[i] = sqlite3val2rb(argv[i]);
-    }
-  }
-  rb_funcall2(callable, rb_intern("step"), argc, params);
-  xfree(params);
-}
-
-static void rb_sqlite3_final(sqlite3_context * ctx)
-{
-  VALUE callable = (VALUE)sqlite3_user_data(ctx);
-  VALUE result = rb_funcall(callable, rb_intern("finalize"), 0);
-  set_sqlite3_func_result(ctx, result);
-}
-
-/* call-seq: define_aggregator(name, aggregator)
- *
- * Define an aggregate function named +name+ using the object +aggregator+.
- * +aggregator+ must respond to +step+ and +finalize+.  +step+ will be called
- * with row information and +finalize+ must return the return value for the
- * aggregator function.
- */
-static VALUE define_aggregator(VALUE self, VALUE name, VALUE aggregator)
-{
-  sqlite3RubyPtr ctx;
-  int arity, status;
-
-  Data_Get_Struct(self, sqlite3Ruby, ctx);
-  REQUIRE_OPEN_DB(ctx);
-
-  arity = sqlite3_obj_method_arity(aggregator, rb_intern("step"));
-
-  status = sqlite3_create_function(
-    ctx->db,
-    StringValuePtr(name),
-    arity,
-    SQLITE_UTF8,
-    (void *)aggregator,
-    NULL,
-    rb_sqlite3_step,
-    rb_sqlite3_final
-  );
-
-  rb_iv_set(self, "@agregator", aggregator);
-
-  CHECK(ctx->db, status);
-
-  return self;
-}
-
 /* call-seq: interrupt
  *
  * Interrupts the currently executing operation, causing it to abort.
@@ -856,7 +800,8 @@ void init_sqlite3_database()
   rb_define_method(cSqlite3Database, "last_insert_row_id", last_insert_row_id, 0);
   rb_define_method(cSqlite3Database, "define_function", define_function, 1);
   rb_define_method(cSqlite3Database, "define_function_with_flags", define_function_with_flags, 2);
-  rb_define_method(cSqlite3Database, "define_aggregator", define_aggregator, 2);
+  /* rb_sqlite3_define_aggregator() moved to aggregator.c */
+  rb_define_method(cSqlite3Database, "define_aggregator", rb_sqlite3_define_aggregator, 2);
   rb_define_method(cSqlite3Database, "interrupt", interrupt, 0);
   rb_define_method(cSqlite3Database, "errmsg", errmsg, 0);
   rb_define_method(cSqlite3Database, "errcode", errcode_, 0);
diff --git a/ext/sqlite3/database.h b/ext/sqlite3/database.h
index 63e5e961..b48be848 100644
--- a/ext/sqlite3/database.h
+++ b/ext/sqlite3/database.h
@@ -2,14 +2,18 @@
 #define SQLITE3_DATABASE_RUBY
 
 #include <sqlite3_ruby.h>
+#include <list.h>
 
 struct _sqlite3Ruby {
   sqlite3 *db;
+  rb_sqlite3_list_head_t aggregators;
 };
 
 typedef struct _sqlite3Ruby sqlite3Ruby;
 typedef sqlite3Ruby * sqlite3RubyPtr;
 
 void init_sqlite3_database();
+void set_sqlite3_func_result(sqlite3_context * ctx, VALUE result);
+VALUE sqlite3val2rb(sqlite3_value * val);
 
 #endif
diff --git a/ext/sqlite3/extconf.rb b/ext/sqlite3/extconf.rb
index 3fafa8d3..5d536840 100644
--- a/ext/sqlite3/extconf.rb
+++ b/ext/sqlite3/extconf.rb
@@ -63,6 +63,7 @@ def asplode missing
 have_func('sqlite3_column_database_name')
 have_func('sqlite3_enable_load_extension')
 have_func('sqlite3_load_extension')
+have_func('sqlite3_create_function_v2')
 
 unless have_func('sqlite3_open_v2')
   abort "Please use a newer version of SQLite3"
diff --git a/test/test_integration_aggregate.rb b/test/test_integration_aggregate.rb
index ed722d22..83abfdd6 100644
--- a/test/test_integration_aggregate.rb
+++ b/test/test_integration_aggregate.rb
@@ -81,11 +81,8 @@ def test_create_aggregate_with_two_different_functions
       finalize { |ctx| ctx.result = ctx[:sum] }
     end
 
-    # This is likely to crash, as ruby-sqlite overwrites its only reference
-    # to the "accumulate" handler with the "multiply" handler, although
-    # SQLite still holds a pointer which it follows on next select.
-    #GC.start
-
+    GC.start
+    
     values = @db.get_first_row( "select accumulate(a), multiply(c) from foo" )
     assert_equal 6, values[0]
     assert_equal 1320, values[1]

From de218513a18eab57a6df47c95ff06832156acfb8 Mon Sep 17 00:00:00 2001
From: Jonathan Metter <JMetter@gmx.de>
Date: Sun, 26 Feb 2017 22:15:44 +0100
Subject: [PATCH 05/18] test for aggregate functions used with group by

I missed this in the initial set of unit tests. #44 handles this case
already. Keep in mind though that is just the effect of the order in
which SQLite calls the step() and finalize() functions and could
theoretically change.
---
 test/test_integration_aggregate.rb | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/test/test_integration_aggregate.rb b/test/test_integration_aggregate.rb
index 83abfdd6..94d29cd3 100644
--- a/test/test_integration_aggregate.rb
+++ b/test/test_integration_aggregate.rb
@@ -7,7 +7,7 @@ def setup
       @db.execute "create table foo ( a integer primary key, b text, c integer )"
       @db.execute "insert into foo ( b, c ) values ( 'foo', 10 )"
       @db.execute "insert into foo ( b, c ) values ( 'bar', 11 )"
-      @db.execute "insert into foo ( b, c ) values ( 'baz', 12 )"
+      @db.execute "insert into foo ( b, c ) values ( 'bar', 12 )"
     end
   end
 
@@ -47,6 +47,23 @@ def test_create_aggregate_with_block
     assert_equal 6, value
   end
 
+  def test_create_aggregate_with_group_by
+    @db.create_aggregate( "accumulate", 1 ) do
+      step do |ctx,a|
+        ctx[:sum] ||= 0
+        ctx[:sum] += a.to_i
+      end
+
+      finalize { |ctx| ctx.result = ctx[:sum] }
+    end
+
+    values = @db.execute( "select b, accumulate(c) from foo group by b order by b" )
+    assert_equal "bar", values[0][0]
+    assert_equal 23, values[0][1]
+    assert_equal "foo", values[1][0]
+    assert_equal 10, values[1][1]
+  end
+
   def test_create_aggregate_with_the_same_function_twice_in_a_query
     @db.create_aggregate( "accumulate", 1 ) do
       step do |ctx,a|

From e07fc330e1d4447106cbba9fbba5e6606040e43a Mon Sep 17 00:00:00 2001
From: Jonathan Metter <JMetter@gmx.de>
Date: Mon, 27 Feb 2017 16:14:25 +0100
Subject: [PATCH 06/18] remove misplaced asterisk from xcalloc call

This is clearly an array of VALUE, not of VALUE*. As it should hold
sizeof(VALUE) == sizeof(VALUE*) this does not really matter though.
---
 ext/sqlite3/aggregator.c           | 4 ++--
 ext/sqlite3/aggregator.h           | 2 +-
 ext/sqlite3/database.c             | 4 ++--
 test/test_integration_aggregate.rb | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/ext/sqlite3/aggregator.c b/ext/sqlite3/aggregator.c
index 3d339c7a..ee20b28b 100644
--- a/ext/sqlite3/aggregator.c
+++ b/ext/sqlite3/aggregator.c
@@ -18,7 +18,7 @@ rb_sqlite3_aggregator_step(sqlite3_context * ctx, int argc, sqlite3_value **argv
   int i;
 
   if (argc > 0) {
-    params = xcalloc((size_t)argc, sizeof(VALUE *));
+    params = xcalloc((size_t)argc, sizeof(VALUE));
     for(i = 0; i < argc; i++) {
       params[i] = sqlite3val2rb(argv[i]);
     }
@@ -81,7 +81,7 @@ rb_sqlite3_aggregator_mark(sqlite3RubyPtr ctx)
  * because SQLIite should have already called sqlite3_create_function_v2's
  * destroy callback of all registered aggregate functions. */
 void
-rb_sqlite3_aggregator_destory_all(sqlite3RubyPtr ctx)
+rb_sqlite3_aggregator_destroy_all(sqlite3RubyPtr ctx)
 {
   rb_sqlite3_list_iter_t iter = rb_sqlite3_list_iter_new(&ctx->aggregators);
   aggregator_wrapper_t *aw;
diff --git a/ext/sqlite3/aggregator.h b/ext/sqlite3/aggregator.h
index de219f9b..26c4d160 100644
--- a/ext/sqlite3/aggregator.h
+++ b/ext/sqlite3/aggregator.h
@@ -30,6 +30,6 @@ void
 rb_sqlite3_aggregator_mark(sqlite3RubyPtr ctx);
 
 void
-rb_sqlite3_aggrgegator_destory_all(sqlite3RubyPtr ctx);
+rb_sqlite3_aggregator_destroy_all(sqlite3RubyPtr ctx);
 
 #endif
diff --git a/ext/sqlite3/database.c b/ext/sqlite3/database.c
index dfd358a3..28294529 100644
--- a/ext/sqlite3/database.c
+++ b/ext/sqlite3/database.c
@@ -13,7 +13,7 @@ static void deallocate(void * ctx)
   sqlite3 * db     = c->db;
 
   if(db) sqlite3_close(db);
-  rb_sqlite3_aggregator_destory_all(ctx);
+  rb_sqlite3_aggregator_destroy_all(ctx);
   xfree(c);
 }
 
@@ -81,7 +81,7 @@ static VALUE sqlite3_rb_close(VALUE self)
 
   ctx->db = NULL;
 
-  rb_sqlite3_aggregator_destory_all(ctx);
+  rb_sqlite3_aggregator_destroy_all(ctx);
 
   return self;
 }
diff --git a/test/test_integration_aggregate.rb b/test/test_integration_aggregate.rb
index 94d29cd3..d64a9b85 100644
--- a/test/test_integration_aggregate.rb
+++ b/test/test_integration_aggregate.rb
@@ -162,7 +162,7 @@ def test_create_aggregate_overwrite_function_with_different_arity
 
     values = @db.get_first_row( "select accumulate(c), accumulate(a,c) from foo")
     assert_equal 33, values[0]
-    assert_equal 39, values[1]
+    assert_equal 2145, values[1]
   end
 
   class CustomException < Exception

From 493baf56232b68a7f55c7a84b221d2a05f20941c Mon Sep 17 00:00:00 2001
From: Jonathan Metter <JMetter@gmx.de>
Date: Mon, 27 Feb 2017 20:04:19 +0100
Subject: [PATCH 07/18] use sqlite3_aggregate_context()

This commit addresses the issue that using the same aggregate function
twice in a select statement mixes up the execution contexts in the
AggregateHandler. While resetting the aggregate Handler in the
finalize() method emulates the context lifetime in the other cases
(group by, independent select), resetting is a fragile approach.
Instead sqlite3_aggregate_context() is the function in the SQLite API
that should be used to separate contexts.

sqlite3_create_context() is a bit weird though: it is memory allocator
that returns the same memory when called from a context that already
called sqlite3_create_context once. Equally strange is that there is no
destroy() callback -- sqlite will clear up the memory when it does not
need it any more and won't tell us. Because a second allocator is
difficult to combine with Ruby's GC and because I do not have the
lifetime control, I use it only to store a pointer to a heap allocated
aggregator_instance_t structure. This structure holds the VALUE for the
Ruby instance that belongs to the aggregate function's execution
context. The structure forms a linked list rooted in the
aggregate_wrapper_t for the function it as an instance of, so the
instance can be GC marked.

SQLite's API left me with no choice but to assume that every execution
context ends with a single call to finalize(). At that point I unlink
and destroy the aggregate_instance_t. I never observed SQLite doing
anything else. There are checks in place to catch if SQLite does call
finalize() a second time. If it does not ever call finalize(), we leak
memory. If someone wants to use sqlite3_result_error, he or she should
check if the invocation of sqlite3_result_error is also the right place
to destroy the aggregate_instance_t.

This commit comes with one rather big change: now sqlite3-ruby
allocates the instances of the aggregate handler in the C code. This
forced me to change the interface, because the existing interface
passed only the instance object, not the factory, to the C code. To
that end, I introduced a new private method Database.define_aggregator2
that takes the factory as it sole parameter. Its like
Database.create_aggregate_handler but without the FunctionProxy "ctx"
object that serves no useful purpose. I wrote a new Ruby shim for the
old Database.define_aggregator and adopted Database.create_aggregate
and Database.create_aggregate_handler to the new interface between Ruby
and C. Generally I was careful to keep the interface as it is.

There is a tiny API change that I could not help avoid:
As Database.define_aggregator only receives one object, but it needs
one per execution context, the only option i saw was to use the
provided one as template and give every new instance a clone. This
means, however, that the template must provide a suitable clone. As
every ordinary ruby object already provides one, this should not pose
problems in practice, though.

And of course, people who were getting wrong results from their
aggregate functions suddenly get the right one, which may also come as
a surprise to them.
---
 ext/sqlite3/aggregator.c | 151 ++++++++++++++++++++++++++++++++-------
 ext/sqlite3/aggregator.h |  13 +++-
 ext/sqlite3/database.c   |   5 +-
 lib/sqlite3/database.rb  | 110 ++++++++++++++++++++--------
 4 files changed, 222 insertions(+), 57 deletions(-)

diff --git a/ext/sqlite3/aggregator.c b/ext/sqlite3/aggregator.c
index ee20b28b..7e8d35f8 100644
--- a/ext/sqlite3/aggregator.c
+++ b/ext/sqlite3/aggregator.c
@@ -1,19 +1,73 @@
 #include <aggregator.h>
 #include <database.h>
 
-static int
-rb_sqlite3_aggregator_obj_method_arity(VALUE obj, ID id)
+typedef rb_sqlite3_aggregator_wrapper_t aggregator_wrapper_t;
+typedef rb_sqlite3_aggregator_instance_t aggregator_instance_t;
+
+static aggregator_instance_t already_destroyed_aggregator_instance;
+
+/* called in rb_sqlite3_aggregator_step and rb_sqlite3_aggregator_final. It
+ * checks if the exection context already has an associated instance. If it
+ * has one, it returns it. If there is no instance yet, it creates one and
+ * associates it with the context. */
+static aggregator_instance_t*
+rb_sqlite3_aggregate_instance(sqlite3_context *ctx)
 {
-  VALUE method = rb_funcall(obj, rb_intern("method"), 1, ID2SYM(id));
-  VALUE arity  = rb_funcall(method, rb_intern("arity"), 0);
+  aggregator_wrapper_t *aw = sqlite3_user_data(ctx);
+  aggregator_instance_t *inst;
+  aggregator_instance_t **inst_ptr =
+    sqlite3_aggregate_context(ctx, (int)sizeof(aggregator_instance_t*));
+
+  if (!inst_ptr) {
+    rb_fatal("SQLite is out-of-merory");
+  }
+
+  inst = *inst_ptr;
+  if (!inst) {
+    inst = xcalloc((size_t)1, sizeof(aggregator_instance_t));
+    // just so we know should rb_funcall raise and we get here a second time.
+    *inst_ptr = &already_destroyed_aggregator_instance;
+    rb_sqlite3_list_elem_init(&inst->list);
+    inst->handler_instance = rb_funcall(aw->handler_klass, rb_intern("new"), 0);
+    rb_sqlite3_list_insert_tail(&aw->instances, &inst->list);
+    *inst_ptr = inst;
+  }
+
+  if (inst == &already_destroyed_aggregator_instance) {
+    rb_fatal("SQLite called us back on an already destroyed aggregate instance");
+  }
+
+  return inst;
+}
+
+/* called by rb_sqlite3_aggregator_final. Unlinks and frees the
+ * aggregator_instance_t, so the handler_instance won't be marked any more
+ * and Ruby's GC may free it. */
+static void
+rb_sqlite3_aggregate_instance_destroy(sqlite3_context *ctx)
+{
+  aggregator_instance_t *inst;
+  aggregator_instance_t **inst_ptr = sqlite3_aggregate_context(ctx, 0);
+
+  if (!inst_ptr || (inst = *inst_ptr)) {
+    return;
+  }
+
+  if (inst == &already_destroyed_aggregator_instance) {
+    rb_fatal("attempt to destroy aggregate instance twice");
+  }
+
+  inst->handler_instance = Qnil; // may catch use-after-free
+  rb_sqlite3_list_remove(&inst->list);
+  xfree(inst);
 
-  return (int)NUM2INT(arity);
+  *inst_ptr = &already_destroyed_aggregator_instance;
 }
 
 static void
 rb_sqlite3_aggregator_step(sqlite3_context * ctx, int argc, sqlite3_value **argv)
 {
-  aggregator_wrapper_t *aw = sqlite3_user_data(ctx);
+  aggregator_instance_t *inst = rb_sqlite3_aggregate_instance(ctx);
   VALUE * params = NULL;
   int i;
 
@@ -23,16 +77,18 @@ rb_sqlite3_aggregator_step(sqlite3_context * ctx, int argc, sqlite3_value **argv
       params[i] = sqlite3val2rb(argv[i]);
     }
   }
-  rb_funcall2(aw->handler_klass, rb_intern("step"), argc, params);
+  rb_funcall2(inst->handler_instance, rb_intern("step"), argc, params);
   xfree(params);
 }
 
+/* we assume that this function is only called once per execution context */
 static void
 rb_sqlite3_aggregator_final(sqlite3_context * ctx)
 {
-  aggregator_wrapper_t *aw = sqlite3_user_data(ctx);
-  VALUE result = rb_funcall(aw->handler_klass, rb_intern("finalize"), 0);
+  aggregator_instance_t *inst = rb_sqlite3_aggregate_instance(ctx);
+  VALUE result = rb_funcall(inst->handler_instance, rb_intern("finalize"), 0);
   set_sqlite3_func_result(ctx, result);
+  rb_sqlite3_aggregate_instance_destroy(ctx);
 }
 
 /* called both by sqlite3_create_function_v2's xDestroy-callback and
@@ -45,13 +101,15 @@ rb_sqlite3_aggregator_destroy(void *void_aw)
 {
   aggregator_wrapper_t *aw = void_aw;
   rb_sqlite3_list_iter_t iter = rb_sqlite3_list_iter_new(&aw->instances);
-  rb_sqlite3_list_elem_t *e;
-
-  while ((e = rb_sqlite3_list_iter_step(&iter))) {
-    rb_sqlite3_list_remove(e);
-    xfree(e);
+  aggregator_instance_t *inst;
+  
+  while ((inst = (aggregator_instance_t*)rb_sqlite3_list_iter_step(&iter))) {
+    rb_sqlite3_list_remove(&inst->list);
+    inst->handler_instance = Qnil;
+    xfree(inst);
   }
   rb_sqlite3_list_remove(&aw->list);
+  // chances are we see this in a use-after-free
   aw->handler_klass = Qnil;
   xfree(aw);
 }
@@ -64,10 +122,16 @@ rb_sqlite3_aggregator_mark(sqlite3RubyPtr ctx)
 {
   rb_sqlite3_list_iter_t iter = rb_sqlite3_list_iter_new(&ctx->aggregators);
   aggregator_wrapper_t *aw;
-
+  
   while ((aw = (aggregator_wrapper_t*)rb_sqlite3_list_iter_step(&iter))) {
+    rb_sqlite3_list_iter_t iter2 = rb_sqlite3_list_iter_new(&aw->instances);
+    aggregator_instance_t *inst;
+
     rb_gc_mark(aw->handler_klass);
-    /* TODO: mark instances */
+
+    while ((inst = (aggregator_instance_t*)rb_sqlite3_list_iter_step(&iter2))) {
+      rb_gc_mark(inst->handler_instance);
+    }
   }
 }
 
@@ -115,27 +179,66 @@ int rb_sqlite3_create_function_v1or2(sqlite3 *db, const char *zFunctionName,
 #endif
 }
 
-/* call-seq: define_aggregator(name, aggregator)
+/* call-seq: define_aggregator2(aggregator)
  *
- * Define an aggregate function named +name+ using the object +aggregator+.
- * +aggregator+ must respond to +step+ and +finalize+.  +step+ will be called
- * with row information and +finalize+ must return the return value for the
- * aggregator function.
+ * Define an aggregrate function according to a factory object (the "handler")
+ * that knows how to obtain to all the information. The handler must provide
+ * the following class methods:
+ *
+ * +arity+:: corresponds to the +arity+ parameter of #create_aggregate. This
+ *           message is optional, and if the handler does not respond to it,
+ *           the function will have an arity of -1.
+ * +name+:: this is the name of the function. The handler _must_ implement
+ *          this message.
+ * +new+:: this must be implemented by the handler. It should return a new
+ *         instance of the object that will handle a specific invocation of
+ *         the function.
+ *
+ * The handler instance (the object returned by the +new+ message, described
+ * above), must respond to the following messages:
+ *
+ * +step+:: this is the method that will be called for each step of the
+ *          aggregate function's evaluation. It should take parameters according
+ *          to the *arity* definition.
+ * +finalize+:: this is the method that will be called to finalize the
+ *              aggregate function's evaluation. It should not take arguments.
+ *
+ * Note the difference between this function and #create_aggregate_handler
+ * is that no FunctionProxy ("ctx") object is involved. This manifests in two
+ * ways: The return value of the aggregate function is the return value of
+ * +finalize+ and neither +step+ nor +finalize+ take an additional "ctx"
+ * parameter.
  */
 VALUE
-rb_sqlite3_define_aggregator(VALUE self, VALUE name, VALUE aggregator)
+rb_sqlite3_define_aggregator2(VALUE self, VALUE aggregator)
 {
   /* define_aggregator is added as a method to SQLite3::Database in database.c */
   sqlite3RubyPtr ctx;
   int arity, status;
+  VALUE ruby_name;
 
   Data_Get_Struct(self, sqlite3Ruby, ctx);
   if (!ctx->db) {
     rb_raise(rb_path2class("SQLite3::Exception"), "cannot use a closed database");
   }
 
-  arity = rb_sqlite3_aggregator_obj_method_arity(aggregator, rb_intern("step"));
+  /* aggregator is typically a class and testing for :name or :new in class
+   * is a bit pointless */
 
+  ruby_name = rb_funcall(aggregator, rb_intern("name"), 0);
+
+  if (rb_respond_to(aggregator, rb_intern("arity"))) {
+    VALUE ruby_arity = rb_funcall(aggregator, rb_intern("arity"), 0);
+    arity = NUM2INT(ruby_arity);
+  } else {
+    arity = -1;
+  }
+
+  if (arity < -1 || arity > 127) {
+    rb_raise(rb_eArgError,"%+"PRIsVALUE" arity=%d outside range -1..127",
+            self, arity);
+  }
+  
   aggregator_wrapper_t *aw = xcalloc((size_t)1, sizeof(aggregator_wrapper_t));
   aw->handler_klass = aggregator;
   rb_sqlite3_list_elem_init(&aw->list);
@@ -143,7 +246,7 @@ rb_sqlite3_define_aggregator(VALUE self, VALUE name, VALUE aggregator)
 
   status = rb_sqlite3_create_function_v1or2(
     ctx->db,
-    StringValuePtr(name),
+    StringValueCStr(ruby_name),
     arity,
     SQLITE_UTF8,
     aw,
diff --git a/ext/sqlite3/aggregator.h b/ext/sqlite3/aggregator.h
index 26c4d160..3a191fcc 100644
--- a/ext/sqlite3/aggregator.h
+++ b/ext/sqlite3/aggregator.h
@@ -21,10 +21,19 @@ typedef struct rb_sqlite3_aggregator_wrapper {
 
   /* linked list of all in-flight instances of the AggregateHandler klass. */
   rb_sqlite3_list_head_t instances;
-} aggregator_wrapper_t;
+} rb_sqlite3_aggregator_wrapper_t;
+
+typedef struct rb_sqlite3_aggregator_instance {
+  /* my node in the aggregator_wrapper_t.instances linked ist. Relevent for
+   * the gc_mark function to find this handler_instance */
+   rb_sqlite3_list_elem_t list;
+
+   /* the AggragateHandler instance we are wrappeng here */
+   VALUE handler_instance;
+} rb_sqlite3_aggregator_instance_t;
 
 VALUE
-rb_sqlite3_define_aggregator(VALUE self, VALUE name, VALUE aggregator);
+rb_sqlite3_define_aggregator2(VALUE self, VALUE aggregator);
 
 void
 rb_sqlite3_aggregator_mark(sqlite3RubyPtr ctx);
diff --git a/ext/sqlite3/database.c b/ext/sqlite3/database.c
index 28294529..d398a916 100644
--- a/ext/sqlite3/database.c
+++ b/ext/sqlite3/database.c
@@ -800,8 +800,9 @@ void init_sqlite3_database()
   rb_define_method(cSqlite3Database, "last_insert_row_id", last_insert_row_id, 0);
   rb_define_method(cSqlite3Database, "define_function", define_function, 1);
   rb_define_method(cSqlite3Database, "define_function_with_flags", define_function_with_flags, 2);
-  /* rb_sqlite3_define_aggregator() moved to aggregator.c */
-  rb_define_method(cSqlite3Database, "define_aggregator", rb_sqlite3_define_aggregator, 2);
+  /* public "define_aggregator" is now a shim around define_aggregator2
+   * implemented in Ruby */
+  rb_define_private_method(cSqlite3Database, "define_aggregator2", rb_sqlite3_define_aggregator2, 1);
   rb_define_method(cSqlite3Database, "interrupt", interrupt, 0);
   rb_define_method(cSqlite3Database, "errmsg", errmsg, 0);
   rb_define_method(cSqlite3Database, "errcode", errcode_, 0);
diff --git a/lib/sqlite3/database.rb b/lib/sqlite3/database.rb
index 304b10f0..98919b63 100644
--- a/lib/sqlite3/database.rb
+++ b/lib/sqlite3/database.rb
@@ -440,42 +440,52 @@ def create_function name, arity, text_rep=Constants::TextRep::UTF8, &block
     def create_aggregate( name, arity, step=nil, finalize=nil,
       text_rep=Constants::TextRep::ANY, &block )
 
-      factory = Class.new do
+      proxy = Class.new do
         def self.step( &block )
-          define_method(:step, &block)
+          define_method(:step_with_ctx, &block)
         end
 
         def self.finalize( &block )
-          define_method(:finalize, &block)
+          define_method(:finalize_with_ctx, &block)
         end
       end
 
       if block_given?
-        factory.instance_eval(&block)
+        proxy.instance_eval(&block)
       else
-        factory.class_eval do
-          define_method(:step, step)
-          define_method(:finalize, finalize)
+        proxy.class_eval do
+          define_method(:step_with_ctx, step)
+          define_method(:finalize_with_ctx, finalize)
         end
       end
 
-      proxy = factory.new
-      proxy.extend(Module.new {
-        attr_accessor :ctx
+      proxy.class_eval do
+        # class instance variables
+        @name = name
+        @arity = arity
+
+        def self.name
+          @name
+        end
+
+        def self.arity
+          @arity
+        end
+
+        def initialize
+          @ctx = FunctionProxy.new
+        end
 
         def step( *args )
-          super(@ctx, *args)
+          step_with_ctx(@ctx, *args)
         end
 
         def finalize
-          super(@ctx)
-          result = @ctx.result
-          @ctx = FunctionProxy.new
-          result
+          finalize_with_ctx(@ctx)
+          @ctx.result
         end
-      })
-      proxy.ctx = FunctionProxy.new
-      define_aggregator(name, proxy)
+      end
+      define_aggregator2(proxy)
     end
 
     # This is another approach to creating an aggregate function (see
@@ -526,29 +536,71 @@ def finalize
     #   db.create_aggregate_handler( LengthsAggregateHandler )
     #   puts db.get_first_value( "select lengths(name) from A" )
     def create_aggregate_handler( handler )
-      proxy = Class.new do
-        def initialize klass
-          @klass = klass
-          @fp    = FunctionProxy.new
+      # This is a compatiblity shim so the (basically pointless) FunctionProxy
+      # "ctx" object is passed as first argument to both step() and finalize().
+      # Now its up to the library user whether he prefers to store his
+      # temporaries as instance varibales or fields in the FunctionProxy.
+      # The library user still must set the result value with
+      # FunctionProxy.result= as there is no backwards compatible way to
+      # change this.
+      proxy = Class.new(handler) do
+        def initialize
+          super
+          @fp = FunctionProxy.new
         end
 
         def step( *args )
-          instance.step(@fp, *args)
+          super(@fp, *args)
         end
 
         def finalize
-          instance.finalize @fp
-          @instance = nil
+          super(@fp)
           @fp.result
         end
+      end
+      define_aggregator2(proxy)
+      self
+    end
+
+    # Define an aggregate function named +name+ using a object template
+    # object +aggregator+. +aggregator+ must respond to +step+ and +finalize+.
+    # +step+ will be called with row information and +finalize+ must return the
+    # return value for the aggregator function.
+    #
+    # _API Change:_ +aggregator+ must also implement +clone+. The provided
+    # +aggregator+ object will serve as template that is cloned to provide the
+    # individual instances of the aggregate function. Regular ruby objects
+    # already provide a suitable +clone+.
+    # The functions arity is -1 (veryable arity).
+    def define_aggregator( name, aggregator )
+      # Previously, this has been implemented in C. Now this is just yet
+      # another compatiblity shim
+      proxy = Class.new do
+        @template = aggregator
+        @name = name
+
+        def self.template
+          @template
+        end
 
-        private
+        def self.name
+          @name
+        end
+
+        def initialize
+          @klass = self.class.template.clone
+        end
 
-        def instance
-          @instance ||= @klass.new
+        def step(*args)
+          @klass.step(*args)
+        end
+
+        def finalize
+          @klass.finalize
         end
       end
-      define_aggregator(handler.name, proxy.new(handler))
+      # as proxy does not set arity, it will be -1
+      define_aggregator2(proxy)
       self
     end
 

From dda1563667e16fe8af73f587e9c9ab03d5ac0166 Mon Sep 17 00:00:00 2001
From: Jonathan Metter <JMetter@gmx.de>
Date: Mon, 27 Feb 2017 20:08:54 +0100
Subject: [PATCH 08/18] test aggregate with invalid arity

Arities outside [-1,127] invoke undefined behavior.
---
 test/test_integration_aggregate.rb | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/test/test_integration_aggregate.rb b/test/test_integration_aggregate.rb
index d64a9b85..78bb45b5 100644
--- a/test/test_integration_aggregate.rb
+++ b/test/test_integration_aggregate.rb
@@ -165,6 +165,15 @@ def test_create_aggregate_overwrite_function_with_different_arity
     assert_equal 2145, values[1]
   end
 
+  def test_create_aggregate_with_invalid_arity
+    assert_raise ArgumentError do
+      @db.create_aggregate( "accumulate", 1000 ) do
+        step {|ctx,*args| }
+        finalize { |ctx| }
+      end
+    end
+  end
+
   class CustomException < Exception
   end
 

From 7c766cf5d4c3d6e147c234bef8e1286a55e9de9b Mon Sep 17 00:00:00 2001
From: Jonathan Metter <JMetter@gmx.de>
Date: Mon, 27 Feb 2017 20:13:24 +0100
Subject: [PATCH 09/18] avoid heap allocation for single VALUE

A tiny optimization for the common case that an aggregate function
takes only one value.
---
 ext/sqlite3/aggregator.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/ext/sqlite3/aggregator.c b/ext/sqlite3/aggregator.c
index 7e8d35f8..100025cc 100644
--- a/ext/sqlite3/aggregator.c
+++ b/ext/sqlite3/aggregator.c
@@ -69,16 +69,23 @@ rb_sqlite3_aggregator_step(sqlite3_context * ctx, int argc, sqlite3_value **argv
 {
   aggregator_instance_t *inst = rb_sqlite3_aggregate_instance(ctx);
   VALUE * params = NULL;
+  VALUE one_param;
   int i;
 
-  if (argc > 0) {
+  if (argc == 1) {
+    one_param = sqlite3val2rb(argv[i]);
+    params = &one_param;
+  }
+  if (argc > 1) {
     params = xcalloc((size_t)argc, sizeof(VALUE));
     for(i = 0; i < argc; i++) {
       params[i] = sqlite3val2rb(argv[i]);
     }
   }
   rb_funcall2(inst->handler_instance, rb_intern("step"), argc, params);
-  xfree(params);
+  if (argc > 1) {
+    xfree(params);
+  }
 }
 
 /* we assume that this function is only called once per execution context */

From 0fde2b6adf621239120b211ea5e68096819120e6 Mon Sep 17 00:00:00 2001
From: Jonathan Metter <JMetter@gmx.de>
Date: Mon, 27 Feb 2017 22:03:29 +0100
Subject: [PATCH 10/18] test aggregate handlers raise in new/finalize

---
 test/test_integration_aggregate.rb | 42 +++++++++++++++++++++++++++---
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/test/test_integration_aggregate.rb b/test/test_integration_aggregate.rb
index 78bb45b5..382c73c7 100644
--- a/test/test_integration_aggregate.rb
+++ b/test/test_integration_aggregate.rb
@@ -177,7 +177,7 @@ def test_create_aggregate_with_invalid_arity
   class CustomException < Exception
   end
 
-  def test_create_aggregate_with_exception
+  def test_create_aggregate_with_exception_in_step
     @db.create_aggregate( "raiseexception", 1 ) do
       step do |ctx,a|
         raise CustomException.new( "bogus aggregate handler" )
@@ -191,6 +191,22 @@ def test_create_aggregate_with_exception
     end
   end
 
+  def test_create_aggregate_with_exception_in_finalize
+    @db.create_aggregate( "raiseexception", 1 ) do
+      step do |ctx,a|
+        raise CustomException.new( "bogus aggregate handler" )
+      end
+
+      finalize do |ctx|
+        raise CustomException.new( "bogus aggregate handler" )
+      end
+    end
+
+    assert_raise CustomException do
+      @db.get_first_value( "select raiseexception(a) from foo")
+    end
+  end
+
   def test_create_aggregate_with_no_data
     @db.create_aggregate( "accumulate", 1 ) do
       step do |ctx,a|
@@ -242,7 +258,7 @@ def test_create_aggregate_handler_call_with_wrong_arity
    end
   end
 
-  class RaiseExceptionAggregateHandler
+  class RaiseExceptionStepAggregateHandler
     class << self
       def arity; 1; end
       def text_rep; SQLite3::Constants::TextRep::ANY; end
@@ -254,8 +270,26 @@ def step(ctx, a)
     def finalize(ctx); ctx.result = nil; end
   end
 
-  def test_create_aggregate_handler_with_exception
-    @db.create_aggregate_handler RaiseExceptionAggregateHandler
+  def test_create_aggregate_handler_with_exception_step
+    @db.create_aggregate_handler RaiseExceptionStepAggregateHandler
+    assert_raise CustomException do
+      @db.get_first_value( "select raiseexception(a) from foo")
+    end
+  end
+
+  class RaiseExceptionNewAggregateHandler
+    class << self
+      def name; "raiseexception"; end
+    end
+    def initialize
+      raise CustomException.new( "bogus aggregate handler" )
+    end
+    def step(ctx, a); end
+    def finalize(ctx); ctx.result = nil; end
+  end
+
+  def test_create_aggregate_handler_with_exception_new
+    @db.create_aggregate_handler RaiseExceptionNewAggregateHandler
     assert_raise CustomException do
       @db.get_first_value( "select raiseexception(a) from foo")
     end

From 4a3e62e6e6959f095662c4dfe0edac1367042f1e Mon Sep 17 00:00:00 2001
From: Jonathan Metter <JMetter@gmx.de>
Date: Mon, 27 Feb 2017 22:21:33 +0100
Subject: [PATCH 11/18] protect funcalls in aggregate callbacks

We call +new+, +step+ and +finalize+ in callback context from
sqlite3_step.
If one of these function raises, we have to deal with the following
issues:
- all three functions (may) leak memory from xcalloc in this case
- the checks I inserted in 325ac74775bff4bef14d9b45e806e5b2f021baec
result in
  a rb_fatal() if +new+ raises.
- SQLite surely does not expect its callbacks to far-jump out of
sqlite3_step.
  I don't if that causes any inconsistencies in SQLite's internal data
structures.

We address these issues by protecting these three funcalls with
rb_protect, which suppresses the exceptions. Statement.step is the
point where suppressed exceptions should be re-raised. As I could not
find an easy way to transport the status value from rb_protect to
Statement.step (sqlite3_set_error_code would be possible but fragile),
I instead re-raise the exception from rb_errcode(). In principal, this
may raise any exception in $!, but I don't see how this would affect
normal client code.

I also set sqlite3_result_error in finalize to indicate an exception,
although the user should see the raised exception instead. It would
certainly be more consequent to call sqlite3_result_error as soon as we
catch an exception. I guess this would stop SQLite from doing any more
useless work and abort the query. But, as I already wrote, I am unsure
about the consequences for memory lifetime in that case, so it seems
safer to me to defer any error until finalize.
---
 ext/sqlite3/aggregator.c | 60 ++++++++++++++++++++++++++++++++++++----
 ext/sqlite3/aggregator.h |  8 ++++++
 ext/sqlite3/statement.c  |  9 ++++++
 3 files changed, 71 insertions(+), 6 deletions(-)

diff --git a/ext/sqlite3/aggregator.c b/ext/sqlite3/aggregator.c
index 100025cc..76d9171f 100644
--- a/ext/sqlite3/aggregator.c
+++ b/ext/sqlite3/aggregator.c
@@ -6,6 +6,33 @@ typedef rb_sqlite3_aggregator_instance_t aggregator_instance_t;
 
 static aggregator_instance_t already_destroyed_aggregator_instance;
 
+typedef struct rb_sqlite3_protected_funcall_args {
+    VALUE self;
+    ID method;
+    int argc;
+    VALUE *params;
+} protected_funcall_args_t;
+
+/* why isn't there something like this in the ruby API? */
+static VALUE
+rb_sqlite3_protected_funcall_body(VALUE protected_funcall_args_ptr)
+{
+  protected_funcall_args_t *args =
+    (protected_funcall_args_t*)protected_funcall_args_ptr;
+
+  return rb_funcall2(args->self, args->method, args->argc, args->params);
+}
+
+static VALUE
+rb_sqlite3_protected_funcall(VALUE self, ID method, int argc, VALUE *params,
+                             int* exc_status)
+{
+  protected_funcall_args_t args = {
+    .self = self, .method = method, .argc = argc, .params = params
+  };
+  return rb_protect(rb_sqlite3_protected_funcall_body, (VALUE)(&args), exc_status);
+}
+
 /* called in rb_sqlite3_aggregator_step and rb_sqlite3_aggregator_final. It
  * checks if the exection context already has an associated instance. If it
  * has one, it returns it. If there is no instance yet, it creates one and
@@ -25,11 +52,14 @@ rb_sqlite3_aggregate_instance(sqlite3_context *ctx)
   inst = *inst_ptr;
   if (!inst) {
     inst = xcalloc((size_t)1, sizeof(aggregator_instance_t));
-    // just so we know should rb_funcall raise and we get here a second time.
-    *inst_ptr = &already_destroyed_aggregator_instance;
     rb_sqlite3_list_elem_init(&inst->list);
-    inst->handler_instance = rb_funcall(aw->handler_klass, rb_intern("new"), 0);
+
+    inst->handler_instance = rb_sqlite3_protected_funcall(
+      aw->handler_klass, rb_intern("new"), 0, NULL, &inst->exc_status);
+    /* we create via instance even if new failed such that step() and finalize()
+     * can go on as usual */
     rb_sqlite3_list_insert_tail(&aw->instances, &inst->list);
+
     *inst_ptr = inst;
   }
 
@@ -72,6 +102,10 @@ rb_sqlite3_aggregator_step(sqlite3_context * ctx, int argc, sqlite3_value **argv
   VALUE one_param;
   int i;
 
+  if (inst->exc_status) {
+    return;
+  }
+
   if (argc == 1) {
     one_param = sqlite3val2rb(argv[i]);
     params = &one_param;
@@ -82,7 +116,8 @@ rb_sqlite3_aggregator_step(sqlite3_context * ctx, int argc, sqlite3_value **argv
       params[i] = sqlite3val2rb(argv[i]);
     }
   }
-  rb_funcall2(inst->handler_instance, rb_intern("step"), argc, params);
+  rb_sqlite3_protected_funcall(
+    inst->handler_instance, rb_intern("step"), argc, params, &inst->exc_status);
   if (argc > 1) {
     xfree(params);
   }
@@ -93,8 +128,21 @@ static void
 rb_sqlite3_aggregator_final(sqlite3_context * ctx)
 {
   aggregator_instance_t *inst = rb_sqlite3_aggregate_instance(ctx);
-  VALUE result = rb_funcall(inst->handler_instance, rb_intern("finalize"), 0);
-  set_sqlite3_func_result(ctx, result);
+  if (!inst->exc_status) {
+    VALUE result = rb_sqlite3_protected_funcall(
+      inst->handler_instance, rb_intern("finalize"), 0, NULL, &inst->exc_status);
+    if (!inst->exc_status) {
+      set_sqlite3_func_result(ctx, result);
+    }
+  }
+
+  if (inst->exc_status) {
+    /* the user should never see this, as Statement.step() will pick up the
+     * outstanding exception and raise it instead of generating a new one
+     * for SQLITE_ERROR with message "Ruby Exception occured" */
+    sqlite3_result_error(ctx, "Ruby Exception occured", -1);
+  }
+
   rb_sqlite3_aggregate_instance_destroy(ctx);
 }
 
diff --git a/ext/sqlite3/aggregator.h b/ext/sqlite3/aggregator.h
index 3a191fcc..b5c1bfed 100644
--- a/ext/sqlite3/aggregator.h
+++ b/ext/sqlite3/aggregator.h
@@ -23,6 +23,8 @@ typedef struct rb_sqlite3_aggregator_wrapper {
   rb_sqlite3_list_head_t instances;
 } rb_sqlite3_aggregator_wrapper_t;
 
+/* the aggregator_instance holds a refence to an instance of its
+ * AggregatorHandler class. */
 typedef struct rb_sqlite3_aggregator_instance {
   /* my node in the aggregator_wrapper_t.instances linked ist. Relevent for
    * the gc_mark function to find this handler_instance */
@@ -30,6 +32,12 @@ typedef struct rb_sqlite3_aggregator_instance {
 
    /* the AggragateHandler instance we are wrappeng here */
    VALUE handler_instance;
+
+   /* status as returned by rb_protect. If this is non-zero we already had an
+    * expception. From that point on, step() won't call into Ruby anymore
+    * and finalize() will just call sqlite3_result_error. The exception
+    * itself is carried via rb_errinfo up to Statement.step. */
+   int exc_status;
 } rb_sqlite3_aggregator_instance_t;
 
 VALUE
diff --git a/ext/sqlite3/statement.c b/ext/sqlite3/statement.c
index 0e5ef85d..2dab7a79 100644
--- a/ext/sqlite3/statement.c
+++ b/ext/sqlite3/statement.c
@@ -125,6 +125,15 @@ static VALUE step(VALUE self)
   stmt = ctx->st;
 
   value = sqlite3_step(stmt);
+  if (rb_errinfo() != Qnil) {
+    /* some user defined function was invoked as a callback during step and
+     * it raised an exception that has been suppressed until step returns.
+     * Now re-raise it. */
+    VALUE exception = rb_errinfo();
+    rb_set_errinfo(Qnil);
+    rb_exc_raise(exception);
+  }
+
   length = sqlite3_column_count(stmt);
   list = rb_ary_new2((long)length);
 

From 773775dfaccdd82896f0c6d7b48c21339112bce9 Mon Sep 17 00:00:00 2001
From: Jonathan Metter <JMetter@gmx.de>
Date: Tue, 28 Feb 2017 00:43:14 +0100
Subject: [PATCH 12/18] parameters, not grouped expr

---
 test/test_integration_aggregate.rb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_integration_aggregate.rb b/test/test_integration_aggregate.rb
index 382c73c7..96d5c68d 100644
--- a/test/test_integration_aggregate.rb
+++ b/test/test_integration_aggregate.rb
@@ -253,7 +253,7 @@ def test_aggregate_initialized_twice
   def test_create_aggregate_handler_call_with_wrong_arity
     @db.create_aggregate_handler AggregateHandler
 
-    assert_raise (SQLite3::SQLException) do
+    assert_raise(SQLite3::SQLException) do
      @db.get_first_value( "select multiply(a,c) from foo" )
    end
   end

From 2e97356aaa9934b1fdd9525bab7b975c61d79cd5 Mon Sep 17 00:00:00 2001
From: Jonathan Metter <JMetter@gmx.de>
Date: Tue, 28 Feb 2017 00:46:01 +0100
Subject: [PATCH 13/18] No PRIsVALUE in ruby 1.9.3

---
 ext/sqlite3/aggregator.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/ext/sqlite3/aggregator.c b/ext/sqlite3/aggregator.c
index 76d9171f..885e400a 100644
--- a/ext/sqlite3/aggregator.c
+++ b/ext/sqlite3/aggregator.c
@@ -290,10 +290,14 @@ rb_sqlite3_define_aggregator2(VALUE self, VALUE aggregator)
   }
 
   if (arity < -1 || arity > 127) {
-    rb_raise(rb_eArgError,"%+"PRIsVALUE" arity=%d outside range -1..127",
+#ifdef PRIsVALUE
+    rb_raise(rb_eArgError, "%"PRIsVALUE" arity=%d out of range -1..127",
             self, arity);
+#else
+    rb_raise(rb_eArgError, "Aggregator arity=%d out of range -1..127", arity);
+#endif
   }
-  
+
   aggregator_wrapper_t *aw = xcalloc((size_t)1, sizeof(aggregator_wrapper_t));
   aw->handler_klass = aggregator;
   rb_sqlite3_list_elem_init(&aw->list);

From 387d06109adbeba68adf262c568076c60c68a893 Mon Sep 17 00:00:00 2001
From: Jonathan Metter <JMetter@gmx.de>
Date: Tue, 28 Feb 2017 00:48:19 +0100
Subject: [PATCH 14/18] avoid mixed declarations

Silence some warnings.
---
 ext/sqlite3/aggregator.c | 3 ++-
 ext/sqlite3/list.h       | 5 ++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/ext/sqlite3/aggregator.c b/ext/sqlite3/aggregator.c
index 885e400a..9ccf725b 100644
--- a/ext/sqlite3/aggregator.c
+++ b/ext/sqlite3/aggregator.c
@@ -271,6 +271,7 @@ rb_sqlite3_define_aggregator2(VALUE self, VALUE aggregator)
   sqlite3RubyPtr ctx;
   int arity, status;
   VALUE ruby_name;
+  aggregator_wrapper_t *aw;
 
   Data_Get_Struct(self, sqlite3Ruby, ctx);
   if (!ctx->db) {
@@ -298,7 +299,7 @@ rb_sqlite3_define_aggregator2(VALUE self, VALUE aggregator)
 #endif
   }
 
-  aggregator_wrapper_t *aw = xcalloc((size_t)1, sizeof(aggregator_wrapper_t));
+  aw = xcalloc((size_t)1, sizeof(aggregator_wrapper_t));
   aw->handler_klass = aggregator;
   rb_sqlite3_list_elem_init(&aw->list);
   rb_sqlite3_list_head_init(&aw->instances);
diff --git a/ext/sqlite3/list.h b/ext/sqlite3/list.h
index 043de81b..6a97b021 100644
--- a/ext/sqlite3/list.h
+++ b/ext/sqlite3/list.h
@@ -162,9 +162,12 @@ rb_sqlite3_list_remove(rb_sqlite3_list_elem_t* elem)
 static inline rb_sqlite3_list_iter_t
 rb_sqlite3_list_iter_new(rb_sqlite3_list_head_t* head)
 {
+  rb_sqlite3_list_iter_t iter;
+
   /* again check if head looks initalized */
   assert(head->elem.prev && head->elem.next);
-  rb_sqlite3_list_iter_t iter = {.head = head, .next = head->elem.next };
+  iter.head = head;
+  iter.next = head->elem.next;
   return iter;
 }
 

From c5a01a05f18892a772514179fa8efe7e320315f9 Mon Sep 17 00:00:00 2001
From: Jonathan Metter <JMetter@gmx.de>
Date: Wed, 1 Mar 2017 10:55:57 +0100
Subject: [PATCH 15/18] avoid leak in define_aggregator2

Call StringValueCStr before xcalloc but last of all calls into ruby.
This way we do not leak memory should name.to_s raise or return a
String with interior null.
---
 ext/sqlite3/aggregator.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/ext/sqlite3/aggregator.c b/ext/sqlite3/aggregator.c
index 9ccf725b..14f5aadf 100644
--- a/ext/sqlite3/aggregator.c
+++ b/ext/sqlite3/aggregator.c
@@ -271,6 +271,7 @@ rb_sqlite3_define_aggregator2(VALUE self, VALUE aggregator)
   sqlite3RubyPtr ctx;
   int arity, status;
   VALUE ruby_name;
+  const char *name;
   aggregator_wrapper_t *aw;
 
   Data_Get_Struct(self, sqlite3Ruby, ctx);
@@ -299,6 +300,8 @@ rb_sqlite3_define_aggregator2(VALUE self, VALUE aggregator)
 #endif
   }
 
+  name = StringValueCStr(ruby_name);
+
   aw = xcalloc((size_t)1, sizeof(aggregator_wrapper_t));
   aw->handler_klass = aggregator;
   rb_sqlite3_list_elem_init(&aw->list);
@@ -306,7 +309,7 @@ rb_sqlite3_define_aggregator2(VALUE self, VALUE aggregator)
 
   status = rb_sqlite3_create_function_v1or2(
     ctx->db,
-    StringValueCStr(ruby_name),
+    name,
     arity,
     SQLITE_UTF8,
     aw,

From 0ea5ff3415cf67059512c52b957104d4f0d1f85e Mon Sep 17 00:00:00 2001
From: Jonathan Metter <JMetter@gmx.de>
Date: Wed, 1 Mar 2017 11:28:18 +0100
Subject: [PATCH 16/18] define_aggregator respects step's arity again

This has been undocumented behavior before: while create_aggregate and
create_aggregate_handler effectively used an arity of -1, the library
user could actually define functions with a different arity by calling
define_aggregator directly. I was not aware of this behavior and
therefore broke it by forcing the arity to -1. Now I added the required
shim and a test.
---
 lib/sqlite3/database.rb            |  8 ++++++--
 test/test_integration_aggregate.rb | 33 ++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/lib/sqlite3/database.rb b/lib/sqlite3/database.rb
index 98919b63..2657a1f5 100644
--- a/lib/sqlite3/database.rb
+++ b/lib/sqlite3/database.rb
@@ -571,7 +571,7 @@ def finalize
     # +aggregator+ object will serve as template that is cloned to provide the
     # individual instances of the aggregate function. Regular ruby objects
     # already provide a suitable +clone+.
-    # The functions arity is -1 (veryable arity).
+    # The functions arity is the arity of the +step+ method.
     def define_aggregator( name, aggregator )
       # Previously, this has been implemented in C. Now this is just yet
       # another compatiblity shim
@@ -587,6 +587,11 @@ def self.name
           @name
         end
 
+        def self.arity
+          # this is what sqlite3_obj_method_arity did before
+          @template.method(:step).arity
+        end
+
         def initialize
           @klass = self.class.template.clone
         end
@@ -599,7 +604,6 @@ def finalize
           @klass.finalize
         end
       end
-      # as proxy does not set arity, it will be -1
       define_aggregator2(proxy)
       self
     end
diff --git a/test/test_integration_aggregate.rb b/test/test_integration_aggregate.rb
index 96d5c68d..be9a9c37 100644
--- a/test/test_integration_aggregate.rb
+++ b/test/test_integration_aggregate.rb
@@ -300,4 +300,37 @@ def test_create_aggregate_handler
     value = @db.get_first_value( "select multiply(a) from foo" )
     assert_equal 6, value
   end
+
+  class AccumulateAggregator
+    def step(*args)
+      @sum ||= 0
+      args.each { |a| @sum += a.to_i }
+    end
+
+    def finalize
+      @sum
+    end
+  end
+
+  class AccumulateAggregator2
+    def step(a, b)
+      @sum ||= 1
+      @sum *= (a.to_i + b.to_i)
+    end
+
+    def finalize
+      @sum
+    end
+  end
+
+  def test_define_aggregator_with_two_different_arities
+    @db.define_aggregator( "accumulate", AccumulateAggregator.new )
+    @db.define_aggregator( "accumulate", AccumulateAggregator2.new )
+
+    GC.start
+
+    values = @db.get_first_row( "select accumulate(c), accumulate(a,c) from foo")
+    assert_equal 33, values[0]
+    assert_equal 2145, values[1]
+  end
 end

From 06ee4ac645e916f0f8c8d25596c1361e044baf6b Mon Sep 17 00:00:00 2001
From: Jonathan Metter <JMetter@gmx.de>
Date: Tue, 28 Mar 2017 22:45:38 +0200
Subject: [PATCH 17/18] Aggregators backed by ruby arrays, no xDestroy callback

All data is now managed in plain ruby objects referenced from ruby
arrays. Instance variables are prefixed with "-" to make them
inaccessible from scripts.

I ran into the finalization issue in `deallocate()`: When
`deallocate()` calls `sqlite3_close()` the object passed to
`sqlite3_create_function` must still be alive. Otherwise the
`xDestroy()` handler will attempt to destroy something that is not
around any more. Unfortunately, when `deallocate()` is invoked, the GC
might already have released the aggregator wrappers, because the
SQLite3::Database ruby object is already dead at that point. The only
way to circumvent this, that I came up with, would have been to
reference the aggregator wrappers from a global root, e.g. by
referencing a ruby array with `rb_gc_register_address()`. Given that
the aim was to cut code size, I considered it more appropriate to do
without the xDestroy callback all together. That is, I call the old
sqlite3_create_function API again.
---
 Manifest.txt             |   3 +
 ext/sqlite3/aggregator.c | 214 +++++++++++++++------------------------
 ext/sqlite3/aggregator.h |  42 +-------
 ext/sqlite3/database.c   |  13 +--
 ext/sqlite3/database.h   |   2 -
 ext/sqlite3/extconf.rb   |   1 -
 ext/sqlite3/list.h       | 191 ----------------------------------
 7 files changed, 90 insertions(+), 376 deletions(-)
 delete mode 100644 ext/sqlite3/list.h

diff --git a/Manifest.txt b/Manifest.txt
index 696c7f3e..01da3677 100644
--- a/Manifest.txt
+++ b/Manifest.txt
@@ -9,6 +9,8 @@ Manifest.txt
 README.rdoc
 Rakefile
 appveyor.yml
+ext/sqlite3/aggregator.c
+ext/sqlite3/aggregator.h
 ext/sqlite3/backup.c
 ext/sqlite3/backup.h
 ext/sqlite3/database.c
@@ -47,6 +49,7 @@ test/test_database_readwrite.rb
 test/test_deprecated.rb
 test/test_encoding.rb
 test/test_integration.rb
+test/test_integration_aggregate.rb
 test/test_integration_open_close.rb
 test/test_integration_pending.rb
 test/test_integration_resultset.rb
diff --git a/ext/sqlite3/aggregator.c b/ext/sqlite3/aggregator.c
index 14f5aadf..6d0eed09 100644
--- a/ext/sqlite3/aggregator.c
+++ b/ext/sqlite3/aggregator.c
@@ -1,10 +1,24 @@
 #include <aggregator.h>
 #include <database.h>
 
-typedef rb_sqlite3_aggregator_wrapper_t aggregator_wrapper_t;
-typedef rb_sqlite3_aggregator_instance_t aggregator_instance_t;
-
-static aggregator_instance_t already_destroyed_aggregator_instance;
+/* wraps a factory "handler" class. The "-aggregators" instance variable of
+ * the SQLite3::Database holds an array of all AggrogatorWrappers.
+ *
+ * An AggregatorWrapper holds the following instance variables:
+ * -handler_klass: the handler that creates the instances.
+ * -instances:     array of all the cAggregatorInstance objects currently
+ *                 in-flight for this aggregator. */
+static VALUE cAggregatorWrapper;
+
+/* wraps a intance of the "handler" class. Loses its reference at the end of
+ * the xFinal callback.
+ *
+ * An AggregatorInstance holds the following instnace variables:
+ * -handler_instance: the instance to call `step` and `finalize` on.
+ * -exc_status:       status returned by rb_protect.
+ *                    != 0 if an exception occurred. If an exception occured
+ *                    `step` and `finalize` won't be called any more. */
+static VALUE cAggregatorInstance;
 
 typedef struct rb_sqlite3_protected_funcall_args {
     VALUE self;
@@ -37,33 +51,35 @@ rb_sqlite3_protected_funcall(VALUE self, ID method, int argc, VALUE *params,
  * checks if the exection context already has an associated instance. If it
  * has one, it returns it. If there is no instance yet, it creates one and
  * associates it with the context. */
-static aggregator_instance_t*
+static VALUE
 rb_sqlite3_aggregate_instance(sqlite3_context *ctx)
 {
-  aggregator_wrapper_t *aw = sqlite3_user_data(ctx);
-  aggregator_instance_t *inst;
-  aggregator_instance_t **inst_ptr =
-    sqlite3_aggregate_context(ctx, (int)sizeof(aggregator_instance_t*));
+  VALUE aw = (VALUE) sqlite3_user_data(ctx);
+  VALUE handler_klass = rb_iv_get(aw, "-handler_klass");
+  VALUE inst;
+  VALUE *inst_ptr = sqlite3_aggregate_context(ctx, (int)sizeof(VALUE));
 
   if (!inst_ptr) {
     rb_fatal("SQLite is out-of-merory");
   }
 
   inst = *inst_ptr;
-  if (!inst) {
-    inst = xcalloc((size_t)1, sizeof(aggregator_instance_t));
-    rb_sqlite3_list_elem_init(&inst->list);
 
-    inst->handler_instance = rb_sqlite3_protected_funcall(
-      aw->handler_klass, rb_intern("new"), 0, NULL, &inst->exc_status);
-    /* we create via instance even if new failed such that step() and finalize()
-     * can go on as usual */
-    rb_sqlite3_list_insert_tail(&aw->instances, &inst->list);
+  if (inst == Qfalse) { /* Qfalse == 0 */
+    VALUE instances = rb_iv_get(aw, "-instances");
+    int exc_status;
+
+    inst = rb_class_new_instance(0, NULL, cAggregatorInstance);
+    rb_iv_set(inst, "-handler_instance", rb_sqlite3_protected_funcall(
+      handler_klass, rb_intern("new"), 0, NULL, &exc_status));
+    rb_iv_set(inst, "-exc_status", INT2NUM(exc_status));
+
+    rb_ary_push(instances, inst);
 
     *inst_ptr = inst;
   }
 
-  if (inst == &already_destroyed_aggregator_instance) {
+  if (inst == Qnil) {
     rb_fatal("SQLite called us back on an already destroyed aggregate instance");
   }
 
@@ -76,33 +92,38 @@ rb_sqlite3_aggregate_instance(sqlite3_context *ctx)
 static void
 rb_sqlite3_aggregate_instance_destroy(sqlite3_context *ctx)
 {
-  aggregator_instance_t *inst;
-  aggregator_instance_t **inst_ptr = sqlite3_aggregate_context(ctx, 0);
+  VALUE aw = (VALUE) sqlite3_user_data(ctx);
+  VALUE instances = rb_iv_get(aw, "-instances");
+  VALUE *inst_ptr = sqlite3_aggregate_context(ctx, 0);
+  VALUE inst;
 
   if (!inst_ptr || (inst = *inst_ptr)) {
     return;
   }
 
-  if (inst == &already_destroyed_aggregator_instance) {
+  if (inst == Qnil) {
     rb_fatal("attempt to destroy aggregate instance twice");
   }
 
-  inst->handler_instance = Qnil; // may catch use-after-free
-  rb_sqlite3_list_remove(&inst->list);
-  xfree(inst);
+  rb_iv_set(inst, "-handler_instance", Qnil); // may catch use-after-free
+  if (rb_ary_delete(instances, inst) == Qnil) {
+    rb_fatal("must be in instances at that point");
+  }
 
-  *inst_ptr = &already_destroyed_aggregator_instance;
+  *inst_ptr = Qnil;
 }
 
 static void
 rb_sqlite3_aggregator_step(sqlite3_context * ctx, int argc, sqlite3_value **argv)
 {
-  aggregator_instance_t *inst = rb_sqlite3_aggregate_instance(ctx);
+  VALUE inst = rb_sqlite3_aggregate_instance(ctx);
+  VALUE handler_instance = rb_iv_get(inst, "-handler_instance");
   VALUE * params = NULL;
   VALUE one_param;
+  int exc_status = NUM2INT(rb_iv_get(inst, "-exc_status"));
   int i;
 
-  if (inst->exc_status) {
+  if (exc_status) {
     return;
   }
 
@@ -117,26 +138,31 @@ rb_sqlite3_aggregator_step(sqlite3_context * ctx, int argc, sqlite3_value **argv
     }
   }
   rb_sqlite3_protected_funcall(
-    inst->handler_instance, rb_intern("step"), argc, params, &inst->exc_status);
+    handler_instance, rb_intern("step"), argc, params, &exc_status);
   if (argc > 1) {
     xfree(params);
   }
+
+  rb_iv_set(inst, "-exc_status", INT2NUM(exc_status));
 }
 
 /* we assume that this function is only called once per execution context */
 static void
 rb_sqlite3_aggregator_final(sqlite3_context * ctx)
 {
-  aggregator_instance_t *inst = rb_sqlite3_aggregate_instance(ctx);
-  if (!inst->exc_status) {
+  VALUE inst = rb_sqlite3_aggregate_instance(ctx);
+  VALUE handler_instance = rb_iv_get(inst, "-handler_instance");
+  int exc_status = NUM2INT(rb_iv_get(inst, "-exc_status"));
+
+  if (!exc_status) {
     VALUE result = rb_sqlite3_protected_funcall(
-      inst->handler_instance, rb_intern("finalize"), 0, NULL, &inst->exc_status);
-    if (!inst->exc_status) {
+      handler_instance, rb_intern("finalize"), 0, NULL, &exc_status);
+    if (!exc_status) {
       set_sqlite3_func_result(ctx, result);
     }
   }
 
-  if (inst->exc_status) {
+  if (exc_status) {
     /* the user should never see this, as Statement.step() will pick up the
      * outstanding exception and raise it instead of generating a new one
      * for SQLITE_ERROR with message "Ruby Exception occured" */
@@ -146,94 +172,6 @@ rb_sqlite3_aggregator_final(sqlite3_context * ctx)
   rb_sqlite3_aggregate_instance_destroy(ctx);
 }
 
-/* called both by sqlite3_create_function_v2's xDestroy-callback and
- * rb_sqlite3_aggregator_destory_all. Unlinks an aggregate_wrapper and all
- * its instances. The effect is that on the next run of
- * rb_sqlite3_aggregator_mark() will not find the VALUEs for the
- * AggregateHandler class and its instances anymore. */
-static void
-rb_sqlite3_aggregator_destroy(void *void_aw)
-{
-  aggregator_wrapper_t *aw = void_aw;
-  rb_sqlite3_list_iter_t iter = rb_sqlite3_list_iter_new(&aw->instances);
-  aggregator_instance_t *inst;
-  
-  while ((inst = (aggregator_instance_t*)rb_sqlite3_list_iter_step(&iter))) {
-    rb_sqlite3_list_remove(&inst->list);
-    inst->handler_instance = Qnil;
-    xfree(inst);
-  }
-  rb_sqlite3_list_remove(&aw->list);
-  // chances are we see this in a use-after-free
-  aw->handler_klass = Qnil;
-  xfree(aw);
-}
-
-/* called by rb_sqlite3_mark(), the mark function of Sqlite3::Database.
- * Marks all the AggregateHandler classes and their instances that are
- * currently in use. */
-void
-rb_sqlite3_aggregator_mark(sqlite3RubyPtr ctx)
-{
-  rb_sqlite3_list_iter_t iter = rb_sqlite3_list_iter_new(&ctx->aggregators);
-  aggregator_wrapper_t *aw;
-  
-  while ((aw = (aggregator_wrapper_t*)rb_sqlite3_list_iter_step(&iter))) {
-    rb_sqlite3_list_iter_t iter2 = rb_sqlite3_list_iter_new(&aw->instances);
-    aggregator_instance_t *inst;
-
-    rb_gc_mark(aw->handler_klass);
-
-    while ((inst = (aggregator_instance_t*)rb_sqlite3_list_iter_step(&iter2))) {
-      rb_gc_mark(inst->handler_instance);
-    }
-  }
-}
-
-/* called by sqlite3_rb_close or deallocate after sqlite3_close().
- * At that point the library user can not invoke SQLite APIs any more, so
- * SQLite can not call the AggregateHandlers callbacks any more. Consequently,
- * Ruby's GC is free to release them. To us, this means we may drop the
- * VALUE references by destorying all the remaining warppers
- *
- * Normally, the aggregators list should  be empty at that point
- * because SQLIite should have already called sqlite3_create_function_v2's
- * destroy callback of all registered aggregate functions. */
-void
-rb_sqlite3_aggregator_destroy_all(sqlite3RubyPtr ctx)
-{
-  rb_sqlite3_list_iter_t iter = rb_sqlite3_list_iter_new(&ctx->aggregators);
-  aggregator_wrapper_t *aw;
-
-  while ((aw = (aggregator_wrapper_t*)rb_sqlite3_list_iter_step(&iter))) {
-    rb_sqlite3_aggregator_destroy(aw);
-  }
-}
-
-/* sqlite3_create_function_v2 is available since version 3.7.3 (2010-10-08).
- * It features an additional xDestroy() callback that fires when sqlite does
- * not need some user defined function anymore, e.g. when overwritten by
- * another function with the same name.
- * As this is just a memory optimization, we fall back to the old
- * sqlite3_create_function if the new one is missing */
-int rb_sqlite3_create_function_v1or2(sqlite3 *db, const char *zFunctionName,
-  int nArg, int eTextRep, void *pApp,
-  void (*xFunc)(sqlite3_context*,int,sqlite3_value**),
-  void (*xStep)(sqlite3_context*,int,sqlite3_value**),
-  void (*xFinal)(sqlite3_context*),
-  void(*xDestroy)(void*)
-)
-{
-#ifdef HAVE_SQLITE3_CREATE_FUNCTION_V2
-  return sqlite3_create_function_v2(db, zFunctionName, nArg, eTextRep, pApp,
-    xFunc, xStep, xFinal, xDestroy);
-#else
-  (void)xDestroy;
-  return sqlite3_create_function(db, zFunctionName, nArg, eTextRep, pApp,
-    xFunc, xStep, xFinal);
-#endif
-}
-
 /* call-seq: define_aggregator2(aggregator)
  *
  * Define an aggregrate function according to a factory object (the "handler")
@@ -272,7 +210,8 @@ rb_sqlite3_define_aggregator2(VALUE self, VALUE aggregator)
   int arity, status;
   VALUE ruby_name;
   const char *name;
-  aggregator_wrapper_t *aw;
+  VALUE aw;
+  VALUE aggregators;
 
   Data_Get_Struct(self, sqlite3Ruby, ctx);
   if (!ctx->db) {
@@ -300,32 +239,43 @@ rb_sqlite3_define_aggregator2(VALUE self, VALUE aggregator)
 #endif
   }
 
+  if (!rb_ivar_defined(self, rb_intern("-aggregators"))) {
+    rb_iv_set(self, "-aggregators", rb_ary_new());
+  }
+  aggregators = rb_iv_get(self, "-aggregators");
+
   name = StringValueCStr(ruby_name);
 
-  aw = xcalloc((size_t)1, sizeof(aggregator_wrapper_t));
-  aw->handler_klass = aggregator;
-  rb_sqlite3_list_elem_init(&aw->list);
-  rb_sqlite3_list_head_init(&aw->instances);
+  aw = rb_class_new_instance(0, NULL, cAggregatorWrapper);
+  rb_iv_set(aw, "-handler_klass", aggregator);
+  rb_iv_set(aw, "-instances", rb_ary_new());
 
-  status = rb_sqlite3_create_function_v1or2(
+  status = sqlite3_create_function(
     ctx->db,
     name,
     arity,
     SQLITE_UTF8,
-    aw,
+    (void*)aw,
     NULL,
     rb_sqlite3_aggregator_step,
-    rb_sqlite3_aggregator_final,
-    rb_sqlite3_aggregator_destroy
+    rb_sqlite3_aggregator_final
   );
 
   if (status != SQLITE_OK) {
-    xfree(aw);
     rb_sqlite3_raise(ctx->db, status);
     return self; // just in case rb_sqlite3_raise returns.
   }
 
-  rb_sqlite3_list_insert_tail(&ctx->aggregators, &aw->list);
+  rb_ary_push(aggregators, aw);
 
   return self;
 }
+
+void
+rb_sqlite3_aggregator_init(void)
+{
+  rb_gc_register_address(&cAggregatorWrapper);
+  rb_gc_register_address(&cAggregatorInstance);
+  cAggregatorWrapper = rb_class_new(rb_cObject);
+  cAggregatorInstance = rb_class_new(rb_cObject);
+}
diff --git a/ext/sqlite3/aggregator.h b/ext/sqlite3/aggregator.h
index b5c1bfed..e5b32e3d 100644
--- a/ext/sqlite3/aggregator.h
+++ b/ext/sqlite3/aggregator.h
@@ -3,50 +3,10 @@
 
 #include <sqlite3_ruby.h>
 
-/* the aggregator_wrapper holds a reference to the AggregateHandler class
- * and a list to all instances of that class. The aggregator_wrappers
- * themselves form the _sqlite3Ruby.aggregators list. The purpose of this
- * construct is to make all the AggregateHandlers and their instances visible
- * to the GC mark() function of SQLite3::Database.
- * We remove aggregate handlers from this list once
- * sqlite3_create_function_v2's xDestory() callback fires. On close of the
- * database, we remove all remaining aggregate_wrappers. */
-typedef struct rb_sqlite3_aggregator_wrapper {
-  /* my node in the linked list of all aggregator wrappers. Relevant for
-   * the gc_mark function to find the handler_klass */
-  rb_sqlite3_list_elem_t list;
-
-  /* the AggregateHandler class we are wrapping here */
-  VALUE handler_klass;
-
-  /* linked list of all in-flight instances of the AggregateHandler klass. */
-  rb_sqlite3_list_head_t instances;
-} rb_sqlite3_aggregator_wrapper_t;
-
-/* the aggregator_instance holds a refence to an instance of its
- * AggregatorHandler class. */
-typedef struct rb_sqlite3_aggregator_instance {
-  /* my node in the aggregator_wrapper_t.instances linked ist. Relevent for
-   * the gc_mark function to find this handler_instance */
-   rb_sqlite3_list_elem_t list;
-
-   /* the AggragateHandler instance we are wrappeng here */
-   VALUE handler_instance;
-
-   /* status as returned by rb_protect. If this is non-zero we already had an
-    * expception. From that point on, step() won't call into Ruby anymore
-    * and finalize() will just call sqlite3_result_error. The exception
-    * itself is carried via rb_errinfo up to Statement.step. */
-   int exc_status;
-} rb_sqlite3_aggregator_instance_t;
-
 VALUE
 rb_sqlite3_define_aggregator2(VALUE self, VALUE aggregator);
 
 void
-rb_sqlite3_aggregator_mark(sqlite3RubyPtr ctx);
-
-void
-rb_sqlite3_aggregator_destroy_all(sqlite3RubyPtr ctx);
+rb_sqlite3_aggregator_init(void);
 
 #endif
diff --git a/ext/sqlite3/database.c b/ext/sqlite3/database.c
index d398a916..a60c9f10 100644
--- a/ext/sqlite3/database.c
+++ b/ext/sqlite3/database.c
@@ -13,20 +13,13 @@ static void deallocate(void * ctx)
   sqlite3 * db     = c->db;
 
   if(db) sqlite3_close(db);
-  rb_sqlite3_aggregator_destroy_all(ctx);
   xfree(c);
 }
 
-static void rb_sqlite3_mark(sqlite3RubyPtr ctx)
-{
-  rb_sqlite3_aggregator_mark(ctx);
-}
-
 static VALUE allocate(VALUE klass)
 {
   sqlite3RubyPtr ctx = xcalloc((size_t)1, sizeof(sqlite3Ruby));
-  rb_sqlite3_list_head_init(&ctx->aggregators);
-  return Data_Wrap_Struct(klass, rb_sqlite3_mark, deallocate, ctx);
+  return Data_Wrap_Struct(klass, NULL, deallocate, ctx);
 }
 
 static char *
@@ -81,7 +74,7 @@ static VALUE sqlite3_rb_close(VALUE self)
 
   ctx->db = NULL;
 
-  rb_sqlite3_aggregator_destroy_all(ctx);
+  rb_iv_set(self, "-aggregators", Qnil);
 
   return self;
 }
@@ -825,4 +818,6 @@ void init_sqlite3_database()
 #endif
 
   rb_define_method(cSqlite3Database, "encoding", db_encoding, 0);
+
+  rb_sqlite3_aggregator_init();
 }
diff --git a/ext/sqlite3/database.h b/ext/sqlite3/database.h
index b48be848..e0877894 100644
--- a/ext/sqlite3/database.h
+++ b/ext/sqlite3/database.h
@@ -2,11 +2,9 @@
 #define SQLITE3_DATABASE_RUBY
 
 #include <sqlite3_ruby.h>
-#include <list.h>
 
 struct _sqlite3Ruby {
   sqlite3 *db;
-  rb_sqlite3_list_head_t aggregators;
 };
 
 typedef struct _sqlite3Ruby sqlite3Ruby;
diff --git a/ext/sqlite3/extconf.rb b/ext/sqlite3/extconf.rb
index 5d536840..3fafa8d3 100644
--- a/ext/sqlite3/extconf.rb
+++ b/ext/sqlite3/extconf.rb
@@ -63,7 +63,6 @@ def asplode missing
 have_func('sqlite3_column_database_name')
 have_func('sqlite3_enable_load_extension')
 have_func('sqlite3_load_extension')
-have_func('sqlite3_create_function_v2')
 
 unless have_func('sqlite3_open_v2')
   abort "Please use a newer version of SQLite3"
diff --git a/ext/sqlite3/list.h b/ext/sqlite3/list.h
deleted file mode 100644
index 6a97b021..00000000
--- a/ext/sqlite3/list.h
+++ /dev/null
@@ -1,191 +0,0 @@
-#ifndef SQLITE3_LIST_RUBY
-#define SQLITE3_LIST_RUBY
-
-#include <assert.h>
-
-/* A simple doubly-linked list implementation with a sentinel node.
- *
- * Linked-Lists are tricky and this implementation aims to be easy for a
- * programmer to check and protect against most kinds of API-abuse with
- * assertions. It is inspired by the linux/list.h, but avoids macros and
- * offsetof(). Iteration is done via an explicti iterator instead of a
- * FOREACH macro.
- *
- * General use:
- * You have some "owner" or "parent" structure that holds/owns the list:
- *
- * struct parent {
- *   // other stuff
- *   rb_sqlite3_list_head_t foo_list;
- *   // more other stuff
- * };
- *
- * and a struct for the "foo" list (child) elements
- *
- * struct foo {
- *    rb_sqlite3_list_elem list;
- *    // other stuff
- * };
- *
- * I recommend putting the rb_sqlite3_list_elem first in the child so you can
- * freely cast between struct foo* and rb_sqlite3_list_elem*. If you put
- * rb_sqlite3_list_elem somewhere else you need to do the offsetof calculation
- * yourself.
- *
- * Make sure that both struct parent and struct foo do not move in memory, e.g.
- * allocate on the heap.
- * Also make sure that you initialize both the rb_sqlite3_list_head_t and
- * the rb_sqlite3_list_elem_t before doing anything else with them.
- *
- * The generic clear_list operation looks like:
- *
- * rb_sqlite3_list_iter_t iter = rb_sqlite3_list_iter_new(&parent->head);
- * rb_sqlite3_list_elem_t *e;
- * while ((e = rb_sqlite3_list_iter_step(&iter))) {
- *   rb_sqlite3_list_remove(e);
- *   free(e);
- * }
- *
- * The rest should be clear from the per-function comments below.
- */
-
-typedef struct rb_sqlite3_list_elem
-{
-  struct rb_sqlite3_list_elem* next;
-  struct rb_sqlite3_list_elem* prev;
-} rb_sqlite3_list_elem_t;
-
-/* the head is a sentinel node, it dos not hold any data and should be placed
- * into the object that "owns" the linked list. This struct exists, so that
- * C's type system provides minimal protection against mixing up the head
- * and the ordinary element nodes */
-typedef struct rb_sqlite3_list_head
-{
-  struct rb_sqlite3_list_elem elem;
-} rb_sqlite3_list_head_t;
-
-/* list iterators private data. Obtain a new iterator from
- * rb_sqlite3_list_iter_new(). Now call rb_sqlite3_list_iter_step() on it
- * until it returns NULL */
-typedef struct rb_sqlite3_list_iter
-{
-  struct rb_sqlite3_list_head* head; /* immutable. Always points at head */
-  struct rb_sqlite3_list_elem* next; /* returned by next invocation of step() */
-} rb_sqlite3_list_iter_t;
-
-/* initialize an element to the unconnected state. Should aways be the first
- * thing to do with a rb_sqlite3_list_elem_t. */
-static inline void
-rb_sqlite3_list_elem_init(rb_sqlite3_list_elem_t* elem)
-{
-  elem->prev = elem;
-  elem->next = elem;
-}
-
-/* initialize a list head to the empty state. Should aways be the first
- * thing to do with a rb_sqlite3_list_head_t */
-static inline void
-rb_sqlite3_list_head_init(rb_sqlite3_list_head_t* head)
-{
-  rb_sqlite3_list_elem_init(&head->elem);
-}
-
-/* return true iff the the list is empty. That is the only member of the
- * list is the sentinel */
-static inline int
-rb_sqlite3_list_empty(rb_sqlite3_list_head_t* head)
-{
-  /* an unitialized list is neither empty nor occupied, so you may not call
-   * list_empty on it */
-  assert(head->elem.prev && head->elem.next);
-  return head->elem.next == &head->elem;
-}
-
-/* consider this an internel helper function. Rather define additional
- * functions like rb_sqlite3_list_insert_after() or something like that
- * if you need this functionaliy */
-static inline void
-rb_sqlite3_list_insert_between(rb_sqlite3_list_elem_t* prev_element,
-                               rb_sqlite3_list_elem_t* new_element,
-                               rb_sqlite3_list_elem_t* next_element)
-{
-  /* detect uninitialized prev_element or next_element in the common case
-   * that it is zero filled */
-  assert(prev_element->prev && prev_element->next);
-  assert(next_element->prev && next_element->next);
-
-  /* next_element must actually be the successor of previous element */
-  assert(prev_element->next == next_element);
-  assert(next_element->prev == prev_element);
-
-  /* our new_element must be in the initialized, unconnected state. This
-   * is joust to protect from the mistake of inserting an already inserted
-   * element into a second list, which would corrupt the first list */
-  assert(new_element->prev == new_element);
-
-  new_element->prev = prev_element;
-  new_element->next = next_element;
-  prev_element->next = new_element;
-  next_element->prev = new_element;
-}
-
-/* insert new_element at the tail of the list. In other words insert
- * it before the head */
-static inline void
-rb_sqlite3_list_insert_tail(rb_sqlite3_list_head_t* head,
-                            rb_sqlite3_list_elem_t* new_element)
-{
-  rb_sqlite3_list_insert_between(head->elem.prev, new_element, &head->elem);
-}
-
-/* remove this element from a list. Thanks to this being a doubly-linked list,
- * you don't need the head element of the list. Only elem itself and its
- * direct neighbours are involved */
-static inline void
-rb_sqlite3_list_remove(rb_sqlite3_list_elem_t* elem)
-{
-  rb_sqlite3_list_elem_t* prev = elem->prev;
-  rb_sqlite3_list_elem_t* next = elem->next;
-
-  /* this assertion triggers if elem is not part of a linked list and elso
-   * detetcs zero-filled elem pointers */
-  assert(prev && prev != elem);
-
-  prev->next = next;
-  next->prev = prev;
-
-  /* avoid the common bug of reinserting uninitialized elems */
-  rb_sqlite3_list_elem_init(elem);
-}
-
-/* obtain a new iterator. */
-static inline rb_sqlite3_list_iter_t
-rb_sqlite3_list_iter_new(rb_sqlite3_list_head_t* head)
-{
-  rb_sqlite3_list_iter_t iter;
-
-  /* again check if head looks initalized */
-  assert(head->elem.prev && head->elem.next);
-  iter.head = head;
-  iter.next = head->elem.next;
-  return iter;
-}
-
-/* Yield the next element from the iterator or NULL if at the end of the
- * list. Once iteration is complete, will always return NULL.
- * list step is safe in the sense that you may remove the returned element
- * from the list and free() it. DO NOT MODIFY THE LIST IN ANY OTHER WAY. */
-static inline rb_sqlite3_list_elem_t*
-rb_sqlite3_list_iter_step(rb_sqlite3_list_iter_t* iter)
-{
-  rb_sqlite3_list_elem_t* tmp = iter->next;
-
-  if (iter->next == &iter->head->elem) {
-    return NULL;
-  }
-
-  iter->next = iter->next->next;
-  return tmp;
-}
-
-#endif

From 930dc070b6d2bce1a45e52574417c3491b7d98ad Mon Sep 17 00:00:00 2001
From: Jonathan Metter <JMetter@gmx.de>
Date: Wed, 29 Mar 2017 15:15:07 +0200
Subject: [PATCH 18/18] fix aggregators in Ruby 1.9

rb_class_new generatos class with undefined allocator in ruby 1.9. Calling Class.new is also a one-liner and works in ruby 1.9 and 2.x.
---
 ext/sqlite3/aggregator.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ext/sqlite3/aggregator.c b/ext/sqlite3/aggregator.c
index 6d0eed09..2a1ebf71 100644
--- a/ext/sqlite3/aggregator.c
+++ b/ext/sqlite3/aggregator.c
@@ -276,6 +276,7 @@ rb_sqlite3_aggregator_init(void)
 {
   rb_gc_register_address(&cAggregatorWrapper);
   rb_gc_register_address(&cAggregatorInstance);
-  cAggregatorWrapper = rb_class_new(rb_cObject);
-  cAggregatorInstance = rb_class_new(rb_cObject);
+  /* rb_class_new generatos class with undefined allocator in ruby 1.9 */
+  cAggregatorWrapper = rb_funcall(rb_cClass, rb_intern("new"), 0);
+  cAggregatorInstance = rb_funcall(rb_cClass, rb_intern("new"), 0);
 }