poyrazK · poyrazK · Apr 9, 2026 · Apr 7, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/benchmarks/sqlite_comparison_bench.cpp b/benchmarks/sqlite_comparison_bench.cpp
@@ -108,15 +108,31 @@ struct SQLiteContext {
 static void BM_CloudSQL_Insert(benchmark::State& state) {
     CloudSQLContext ctx("./bench_cloudsql_insert_" + std::to_string(state.thread_index()));
 
+    // Prepare the statement once outside the hot loop
+    auto prepared = ctx.executor->prepare("INSERT INTO bench_table VALUES (?, ?, ?);");
+    if (!prepared) {
+        state.SkipWithError("Failed to prepare statement");
+        return;
+    }
+
+    // Pre-allocate params to avoid heap allocations in the loop
+    std::vector<common::Value> params;
+    params.reserve(3);
+    params.push_back(common::Value::make_int64(0));
+    params.push_back(common::Value::make_float64(3.14));
+    params.push_back(common::Value::make_text("some_payload_data"));
+
+    // Use a single transaction for the whole benchmark to reveal raw engine speed
+    ctx.executor->execute("BEGIN");
+
+    int64_t i = 0;
     for (auto _ : state) {
-        state.PauseTiming();
-        std::string sql = "INSERT INTO bench_table VALUES (" + std::to_string(state.iterations()) + 
-                          ", 3.14, 'some_payload_data');";
-        auto stmt = ParseSQL(sql);
-        state.ResumeTiming();
-
-        ctx.executor->execute(*stmt);
+        // Update only the changing value
+        params[0] = common::Value::make_int64(i++);
+        ctx.executor->execute(*prepared, params);
     }
+
+    ctx.executor->execute("COMMIT");
     state.SetItemsProcessed(state.iterations());
 }
 BENCHMARK(BM_CloudSQL_Insert);

diff --git a/include/common/arena_allocator.hpp b/include/common/arena_allocator.hpp
@@ -0,0 +1,119 @@
+/**
+ * @file arena_allocator.hpp
+ * @brief High-performance bump allocator for execution-scoped data
+ */
+
+#ifndef CLOUDSQL_COMMON_ARENA_ALLOCATOR_HPP
+#define CLOUDSQL_COMMON_ARENA_ALLOCATOR_HPP
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <memory_resource>
+#include <vector>
+
+namespace cloudsql::common {
+
+/**
+ * @class ArenaAllocator
+ * @brief Manages memory chunks and provides fast, contiguous allocations.
+ *
+ * Implements std::pmr::memory_resource for compatibility with standard
+ * containers like std::pmr::vector.
+ */
+class ArenaAllocator : public std::pmr::memory_resource {
+   public:
+    static constexpr size_t DEFAULT_CHUNK_SIZE = 65536;  // 64KB
+
+    explicit ArenaAllocator(size_t chunk_size = DEFAULT_CHUNK_SIZE)
+        : chunk_size_(chunk_size), current_chunk_idx_(0), current_offset_(0) {}
+
+    ~ArenaAllocator() override {
+        for (auto* chunk : chunks_) {
+            delete[] chunk;
+        }
+    }
+
+    // Disable copy
+    ArenaAllocator(const ArenaAllocator&) = delete;
+    ArenaAllocator& operator=(const ArenaAllocator&) = delete;
+
+    /**
+     * @brief Reset the arena, reclaiming all memory for reuse.
+     *
+     * Keeps all allocated chunks but resets pointers so they can be overwritten.
+     * This is an O(1) or O(N_chunks) operation with zero heap overhead.
+     */
+    void reset() {
+        current_chunk_idx_ = 0;
+        current_offset_ = 0;
+    }
+
+   protected:
+    /**
+     * @brief Internal allocation logic for PMR
+     */
+    void* do_allocate(size_t bytes, size_t alignment) override {
+        if (bytes == 0) return nullptr;
+
+        // Align the offset
+        size_t mask = alignment - 1;
+
+        // Try current chunk
+        if (current_chunk_idx_ < chunks_.size()) {
+            size_t aligned_offset = (current_offset_ + mask) & ~mask;
+            if (aligned_offset + bytes <= chunk_size_) {
+                void* result = chunks_[current_chunk_idx_] + aligned_offset;
+                current_offset_ = aligned_offset + bytes;
+                return result;
+            }
+
+            // Move to next existing chunk if possible
+            current_chunk_idx_++;
+            current_offset_ = 0;
+            return do_allocate(bytes, alignment);
+        }
+
+        // Need a new chunk
+        if (bytes > chunk_size_) {
+            auto* large_chunk = new uint8_t[bytes];
+            chunks_.push_back(large_chunk);
+            // We don't make this the "current" chunk for small allocations
+            // to avoid wasting space. We just return it.
+            return large_chunk;
+        }
+
+        allocate_new_chunk();
+        return do_allocate(bytes, alignment);
+    }
+
+    /**
+     * @brief PMR deallocate is a no-op for bump allocators (we reset the whole arena)
+     */
+    void do_deallocate(void* p, size_t bytes, size_t alignment) override {
+        // No-op
+        (void)p;
+        (void)bytes;
+        (void)alignment;
+    }
+
+    bool do_is_equal(const std::pmr::memory_resource& other) const noexcept override {
+        return this == &other;
+    }
+
+   private:
+    void allocate_new_chunk() {
+        chunks_.push_back(new uint8_t[chunk_size_]);
+        // Don't change current_chunk_idx_ here, let the recursive call handle it
+    }
+
+    size_t chunk_size_;
+    std::vector<uint8_t*> chunks_;
+    size_t current_chunk_idx_;
+    size_t current_offset_;
+};
+
+}  // namespace cloudsql::common
+
+#endif  // CLOUDSQL_COMMON_ARENA_ALLOCATOR_HPP
diff --git a/include/executor/operator.hpp b/include/executor/operator.hpp
@@ -7,6 +7,7 @@
 #define CLOUDSQL_EXECUTOR_OPERATOR_HPP
 
 #include <memory>
+#include <memory_resource>
 #include <optional>
 #include <string>
 #include <unordered_map>
@@ -52,6 +53,8 @@ class Operator {
     std::string error_message_;
     Transaction* txn_;
     LockManager* lock_manager_;
+    std::pmr::memory_resource* mr_ = nullptr;
+    const std::vector<common::Value>* params_ = nullptr;
 
    public:
     explicit Operator(OperatorType type, Transaction* txn = nullptr,
@@ -71,6 +74,14 @@ class Operator {
     [[nodiscard]] Transaction* get_txn() const { return txn_; }
     [[nodiscard]] LockManager* get_lock_manager() const { return lock_manager_; }
 
+    virtual void set_memory_resource(std::pmr::memory_resource* mr) { mr_ = mr; }
+    [[nodiscard]] std::pmr::memory_resource* get_memory_resource() const {
+        return mr_ ? mr_ : std::pmr::get_default_resource();
+    }
+
+    virtual void set_params(const std::vector<common::Value>* params) { params_ = params; }
+    [[nodiscard]] const std::vector<common::Value>* get_params() const { return params_; }
+
     virtual bool init() { return true; }
     virtual bool open() { return true; }
     virtual bool next(Tuple& out_tuple) {
@@ -191,6 +202,9 @@ class FilterOperator : public Operator {
     void close() override;
     [[nodiscard]] Schema& output_schema() override;
     void add_child(std::unique_ptr<Operator> child) override;
+
+    void set_memory_resource(std::pmr::memory_resource* mr) override;
+    void set_params(const std::vector<common::Value>* params) override;
 };
 
 /**
@@ -212,6 +226,9 @@ class ProjectOperator : public Operator {
     void close() override;
     [[nodiscard]] Schema& output_schema() override;
     void add_child(std::unique_ptr<Operator> child) override;
+
+    void set_memory_resource(std::pmr::memory_resource* mr) override;
+    void set_params(const std::vector<common::Value>* params) override;
 };
 
 /**
@@ -236,6 +253,9 @@ class SortOperator : public Operator {
     bool next(Tuple& out_tuple) override;
     void close() override;
     [[nodiscard]] Schema& output_schema() override;
+
+    void set_memory_resource(std::pmr::memory_resource* mr) override;
+    void set_params(const std::vector<common::Value>* params) override;
 };
 
 /**
@@ -270,6 +290,9 @@ class AggregateOperator : public Operator {
     bool next(Tuple& out_tuple) override;
     void close() override;
     [[nodiscard]] Schema& output_schema() override;
+
+    void set_memory_resource(std::pmr::memory_resource* mr) override;
+    void set_params(const std::vector<common::Value>* params) override;
 };
 
 /**
@@ -319,6 +342,9 @@ class HashJoinOperator : public Operator {
     void close() override;
     [[nodiscard]] Schema& output_schema() override;
     void add_child(std::unique_ptr<Operator> child) override;
+
+    void set_memory_resource(std::pmr::memory_resource* mr) override;
+    void set_params(const std::vector<common::Value>* params) override;
 };
 
 /**
@@ -341,6 +367,9 @@ class LimitOperator : public Operator {
     void close() override;
     [[nodiscard]] Schema& output_schema() override;
     void add_child(std::unique_ptr<Operator> child) override;
+
+    void set_memory_resource(std::pmr::memory_resource* mr) override;
+    void set_params(const std::vector<common::Value>* params) override;
 };
 
 }  // namespace cloudsql::executor

diff --git a/include/executor/query_executor.hpp b/include/executor/query_executor.hpp
@@ -6,7 +6,11 @@
 #ifndef CLOUDSQL_EXECUTOR_QUERY_EXECUTOR_HPP
 #define CLOUDSQL_EXECUTOR_QUERY_EXECUTOR_HPP
 
+#include <mutex>
+#include <unordered_map>
+
 #include "catalog/catalog.hpp"
+#include "common/arena_allocator.hpp"
 #include "common/cluster_manager.hpp"
 #include "distributed/raft_types.hpp"
 #include "executor/operator.hpp"
@@ -18,6 +22,20 @@
 
 namespace cloudsql::executor {
 
+/**
+ * @brief Represents a pre-parsed and pre-planned SQL statement
+ */
+struct PreparedStatement {
+    std::shared_ptr<parser::Statement> stmt;
+    std::string sql;
+
+    // Cached execution state for hot-path optimization
+    const TableInfo* table_meta = nullptr;
+    std::unique_ptr<Schema> schema;
+    std::unique_ptr<storage::HeapTable> table;
+    std::vector<std::unique_ptr<storage::BTreeIndex>> indexes;
+};
+
 /**
  * @brief State machine for a specific data shard
  */
@@ -62,11 +80,32 @@ class QueryExecutor {
      */
     void set_local_only(bool local) { is_local_only_ = local; }
 
+    /**
+     * @brief Prepare a SQL string into a reusable PreparedStatement
+     */
+    std::shared_ptr<PreparedStatement> prepare(const std::string& sql);
+
     /**
      * @brief Execute a SQL statement and return results
      */
     QueryResult execute(const parser::Statement& stmt);
 
+    /**
+     * @brief Execute a SQL string (includes parsing and cache lookup)
+     */
+    QueryResult execute(const std::string& sql);
+
+    /**
+     * @brief Execute a PreparedStatement with bound parameters
+     */
+    QueryResult execute(const PreparedStatement& prepared,
+                        const std::vector<common::Value>& params);
+
+    /**
+     * @brief Get access to the query-scoped arena
+     */
+    common::ArenaAllocator& arena() { return arena_; }
+
    private:
     Catalog& catalog_;
     storage::BufferPoolManager& bpm_;
@@ -78,6 +117,16 @@ class QueryExecutor {
     transaction::Transaction* current_txn_ = nullptr;
     bool is_local_only_ = false;
 
+    // Bound parameters for the current execution
+    const std::vector<common::Value>* current_params_ = nullptr;
+
+    // Performance structures
+    common::ArenaAllocator arena_;
+
+    // Global statement cache (thread-safe)
+    static std::unordered_map<std::string, std::shared_ptr<parser::Statement>> statement_cache_;
+    static std::mutex cache_mutex_;
+
     QueryResult execute_select(const parser::SelectStatement& stmt, transaction::Transaction* txn);
     QueryResult execute_create_table(const parser::CreateTableStatement& stmt);
     QueryResult execute_create_index(const parser::CreateIndexStatement& stmt);

diff --git a/include/executor/types.hpp b/include/executor/types.hpp
@@ -11,7 +11,9 @@
 #define CLOUDSQL_EXECUTOR_TYPES_HPP
 
 #include <cstdint>
+#include <initializer_list>
 #include <memory>
+#include <memory_resource>
 #include <stdexcept>
 #include <string>
 #include <vector>
@@ -120,14 +122,38 @@ class Schema {
 
 /**
  * @brief A single data row used in the row-oriented (Volcano) execution model.
+ *
+ * Uses std::pmr::vector to support custom allocators (e.g. ArenaAllocator).
  */
 class Tuple {
    private:
-    std::vector<common::Value> values_;
+    std::pmr::vector<common::Value> values_;
 
    public:
     Tuple() = default;
-    explicit Tuple(std::vector<common::Value> values) : values_(std::move(values)) {}
+
+    // Explicit PMR vector constructor
+    explicit Tuple(std::pmr::vector<common::Value> values) : values_(std::move(values)) {}
+
+    // Initializer list constructor
+    Tuple(std::initializer_list<common::Value> list) : values_(list) {}
+
+    // Support allocation from a custom memory resource
+    explicit Tuple(std::pmr::memory_resource* mr)
+        : values_(mr ? mr : std::pmr::get_default_resource()) {}
+
+    // Support construction from standard vector or PMR vector with specific resource
+    template <typename VectorType,
+              typename = std::enable_if_t<!std::is_same_v<std::decay_t<VectorType>, Tuple>>,
+              typename std::enable_if_t<
+                  !std::is_same_v<std::decay_t<VectorType>, std::pmr::memory_resource*>>* = nullptr>
+    Tuple(const VectorType& values, std::pmr::memory_resource* mr = nullptr)
+        : values_(values.begin(), values.end(), mr ? mr : std::pmr::get_default_resource()) {}
+
+    template <typename VectorType,
+              typename = std::enable_if_t<!std::is_same_v<std::decay_t<VectorType>, Tuple>>>
+    explicit Tuple(VectorType&& values)
+        : values_(std::make_move_iterator(values.begin()), std::make_move_iterator(values.end())) {}
 
     Tuple(const Tuple& other) = default;
     Tuple(Tuple&& other) noexcept = default;
@@ -159,8 +185,8 @@ class Tuple {
     [[nodiscard]] size_t size() const { return values_.size(); }
     [[nodiscard]] bool empty() const { return values_.empty(); }
 
-    [[nodiscard]] const std::vector<common::Value>& values() const { return values_; }
-    [[nodiscard]] std::vector<common::Value>& values() { return values_; }
+    [[nodiscard]] const std::pmr::vector<common::Value>& values() const { return values_; }
+    [[nodiscard]] std::pmr::vector<common::Value>& values() { return values_; }
 
     [[nodiscard]] std::string to_string() const;
 };