parseablehq · nikhilsinhaparseable · Oct 25, 2025 · Oct 25, 2025 · coderabbitai · Oct 25, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -28,6 +28,9 @@ parquet = "54.0.0"
 # Web server and HTTP-related
 actix-cors = "0.7.0"
 actix-web = { version = "4.9.0", features = ["rustls-0_22"] }
+tikv-jemalloc-ctl = "0.6.0"
+tikv-jemallocator = "0.6.0"
+tikv-jemalloc-sys = "0.6.1"
 actix-web-httpauth = "0.8"
 actix-web-prometheus = { version = "0.1" }
 actix-web-static-files = "4.0"

diff --git a/src/handlers/http/modal/ingest_server.rs b/src/handlers/http/modal/ingest_server.rs
@@ -129,6 +129,9 @@ impl ParseableServer for IngestServer {
         let (cancel_tx, cancel_rx) = oneshot::channel();
         thread::spawn(|| sync::handler(cancel_rx));
 
+        // Initialize memory release scheduler
+        crate::memory::init_memory_release_scheduler()?;
+
         tokio::spawn(airplane::server());
 
         // Ingestors shouldn't have to deal with OpenId auth flow

diff --git a/src/handlers/http/modal/query_server.rs b/src/handlers/http/modal/query_server.rs
@@ -129,6 +129,9 @@ impl ParseableServer for QueryServer {
             analytics::init_analytics_scheduler()?;
         }
 
+        // Initialize memory release scheduler
+        crate::memory::init_memory_release_scheduler()?;
+
         if init_cluster_metrics_schedular().is_ok() {
             info!("Cluster metrics scheduler started successfully");
         }

diff --git a/src/handlers/http/modal/server.rs b/src/handlers/http/modal/server.rs
@@ -154,6 +154,9 @@ impl ParseableServer for Server {
             analytics::init_analytics_scheduler()?;
         }
 
+        // Initialize memory release scheduler
+        crate::memory::init_memory_release_scheduler()?;
+
         tokio::spawn(handlers::livetail::server());
         tokio::spawn(handlers::airplane::server());
 

diff --git a/src/handlers/http/query.rs b/src/handlers/http/query.rs
@@ -35,7 +35,6 @@ use futures::stream::once;
 use futures::{Stream, StreamExt, future};
 use futures_util::Future;
 use http::StatusCode;
-use itertools::Itertools;
 use serde::{Deserialize, Serialize};
 use serde_json::{Value, json};
 use std::collections::HashMap;
@@ -241,9 +240,15 @@ async fn handle_non_streaming_query(
         with_fields: query_request.fields,
     }
     .to_json()?;
-    Ok(HttpResponse::Ok()
+
+    let http_response = HttpResponse::Ok()
         .insert_header((TIME_ELAPSED_HEADER, total_time.as_str()))
-        .json(response))
+        .json(response);
+
+    // // Force memory release after HTTP response is fully created
+    // force_memory_release();
+
+    Ok(http_response)
 }
 
 /// Handles streaming queries, returning results as newline-delimited JSON (NDJSON).
@@ -324,18 +329,24 @@ fn create_batch_processor(
 ) -> impl FnMut(Result<RecordBatch, QueryError>) -> Result<Bytes, actix_web::Error> {
     move |batch_result| match batch_result {
         Ok(batch) => {
-            let response = QueryResponse {
+            // Create response and immediately process to reduce memory retention
+            let query_response = QueryResponse {
                 records: vec![batch],
                 fields: Vec::new(),
                 fill_null: send_null,
                 with_fields: false,
-            }
-            .to_json()
-            .map_err(|e| {
+            };
+
+            let response = query_response.to_json().map_err(|e| {
                 error!("Failed to parse record batch into JSON: {}", e);
                 actix_web::error::ErrorInternalServerError(e)
             })?;
-            Ok(Bytes::from(format!("{response}\n")))
+
+            // Convert to bytes and explicitly drop the response object
+            let bytes_result = Bytes::from(format!("{response}\n"));
+            drop(response); // Explicit cleanup
+
+            Ok(bytes_result)
         }
         Err(e) => Err(actix_web::error::ErrorInternalServerError(e)),
     }
@@ -380,12 +391,19 @@ pub async fn get_counts(
         let (records, _) = get_records_and_fields(&query_request, &creds).await?;
 
         if let Some(records) = records {
-            let json_records = record_batches_to_json(&records)?;
-            let records = json_records.into_iter().map(Value::Object).collect_vec();
+            // Use optimized JSON conversion with explicit memory management
+            let json_records = {
+                let converted = record_batches_to_json(&records)?;
+                drop(records); // Explicitly drop the original records early
+                converted
+            };
+
+            let processed_records: Vec<Value> =
+                json_records.into_iter().map(Value::Object).collect();
 
             let res = json!({
                 "fields": vec!["start_time", "endTime", "count"],
-                "records": records,
+                "records": processed_records,
             });
 
             return Ok(web::Json(res));

diff --git a/src/lib.rs b/src/lib.rs
@@ -30,6 +30,7 @@ pub mod event;
 pub mod handlers;
 pub mod hottier;
 mod livetail;
+pub mod memory;
 mod metadata;
 pub mod metastore;
 pub mod metrics;

diff --git a/src/main.rs b/src/main.rs
@@ -31,6 +31,11 @@ use tracing_subscriber::layer::SubscriberExt;
 use tracing_subscriber::util::SubscriberInitExt;
 use tracing_subscriber::{EnvFilter, Registry, fmt};
 
+// Use jemalloc as the global allocator
+#[cfg(not(target_env = "msvc"))]
+#[global_allocator]
+static GLOBAL: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc;
+
 #[actix_web::main]
 async fn main() -> anyhow::Result<()> {
     init_logger();

diff --git a/src/memory.rs b/src/memory.rs
@@ -0,0 +1,75 @@
+/*
+ * Parseable Server (C) 2022 - 2024 Parseable, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as
+ * published by the Free Software Foundation, either version 3 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+use std::ffi::CString;
+use std::time::Duration;
+
+use clokwerk::AsyncScheduler;
+use tracing::{info, warn};
+
+/// Force memory release using jemalloc
+pub fn force_memory_release() {
+    // Advance epoch to refresh statistics and trigger potential cleanup
+    if let Err(e) = tikv_jemalloc_ctl::epoch::mib().and_then(|mib| mib.advance()) {
+        warn!("Failed to advance jemalloc epoch: {:?}", e);
+    }
+
+    // Purge each initialized arena
+    if let Ok(n) = tikv_jemalloc_ctl::arenas::narenas::read() {
+        for i in 0..n {
+            if let Ok(name) = CString::new(format!("arena.{i}.purge")) {
+                unsafe {
+                    let ret = tikv_jemalloc_sys::mallctl(
+                        name.as_ptr(),
+                        std::ptr::null_mut(),
+                        std::ptr::null_mut(),
+                        std::ptr::null_mut(),
+                        0,
+                    );
+                    if ret != 0 {
+                        warn!("Arena purge failed for index {i} with code: {ret}");
+                    }
+                }
+            }
+        }
+    } else {
+        warn!("Failed to read jemalloc arenas.narenas");
+    }
+}
+
+/// Initialize memory management scheduler
+pub fn init_memory_release_scheduler() -> anyhow::Result<()> {
+    info!("Setting up scheduler for memory release");
+
+    let mut scheduler = AsyncScheduler::new();
+    scheduler
+        .every(clokwerk::Interval::Hours(1))
+        .run(move || async {
+            info!("Running scheduled memory release");
+            force_memory_release();
+        });
+
+    tokio::spawn(async move {
+        loop {
+            scheduler.run_pending().await;
+            tokio::time::sleep(Duration::from_secs(60)).await; // Check every minute
+        }
+    });
+
+    Ok(())
+}
diff --git a/src/metastore/metastores/object_store_metastore.rs b/src/metastore/metastores/object_store_metastore.rs
@@ -110,10 +110,7 @@ impl Metastore for ObjectStoreMetastore {
     /// Delete an overview
     async fn delete_overview(&self, stream: &str) -> Result<(), MetastoreError> {
         let path = RelativePathBuf::from_iter([stream, "overview"]);
-        Ok(self
-            .storage
-            .delete_object(&path)
-            .await?)
+        Ok(self.storage.delete_object(&path).await?)
     }
 
     /// This function fetches all the keystones from the underlying object store

diff --git a/src/response.rs b/src/response.rs
@@ -18,7 +18,6 @@
 
 use crate::{handlers::http::query::QueryError, utils::arrow::record_batches_to_json};
 use datafusion::arrow::record_batch::RecordBatch;
-use itertools::Itertools;
 use serde_json::{Value, json};
 use tracing::info;
 
@@ -32,26 +31,36 @@ pub struct QueryResponse {
 impl QueryResponse {
     pub fn to_json(&self) -> Result<Value, QueryError> {
         info!("{}", "Returning query results");
-        let mut json_records = record_batches_to_json(&self.records)?;
 
-        if self.fill_null {
-            for map in &mut json_records {
-                for field in &self.fields {
-                    if !map.contains_key(field) {
-                        map.insert(field.clone(), Value::Null);
+        // Process in batches to avoid massive allocations
+        const BATCH_SIZE: usize = 100; // Process 100 record batches at a time
+        let mut all_values = Vec::new();
+
+        for chunk in self.records.chunks(BATCH_SIZE) {
+            let mut json_records = record_batches_to_json(chunk)?;
+
+            if self.fill_null {
+                for map in &mut json_records {
+                    for field in &self.fields {
+                        if !map.contains_key(field) {
+                            map.insert(field.clone(), Value::Null);
+                        }
                     }
                 }
             }
+
+            // Convert this batch to values and add to collection
+            let batch_values: Vec<Value> = json_records.into_iter().map(Value::Object).collect();
+            all_values.extend(batch_values);
         }
-        let values = json_records.into_iter().map(Value::Object).collect_vec();
 
         let response = if self.with_fields {
             json!({
                 "fields": self.fields,
-                "records": values,
+                "records": all_values,
             })
         } else {
-            Value::Array(values)
+            Value::Array(all_values)
         };
 
         Ok(response)

diff --git a/src/utils/arrow/mod.rs b/src/utils/arrow/mod.rs
@@ -48,7 +48,12 @@ use crate::event::DEFAULT_TIMESTAMP_KEY;
 ///
 /// A vector of JSON objects representing the record batches.
 pub fn record_batches_to_json(records: &[RecordBatch]) -> Result<Vec<Map<String, Value>>> {
-    let buf = vec![];
+    // Early return for empty records to avoid unnecessary allocations
+    if records.is_empty() {
+        return Ok(Vec::new());
+    }
+
+    let buf = Vec::with_capacity(records.len() * 1024); // Pre-allocate with reasonable capacity
     let mut writer = arrow_json::ArrayWriter::new(buf);
     for record in records {
         writer.write(record)?;
@@ -57,8 +62,11 @@ pub fn record_batches_to_json(records: &[RecordBatch]) -> Result<Vec<Map<String,
 
     let buf = writer.into_inner();
 
-    let json_rows: Vec<Map<String, Value>> =
-        serde_json::from_reader(buf.as_slice()).unwrap_or_default();
+    // Use a cursor to avoid extra allocations during parsing
+    let json_rows: Vec<Map<String, Value>> = {
+        let cursor = std::io::Cursor::new(buf);
+        serde_json::from_reader(cursor)?
+    };
 
     Ok(json_rows)
 }