Use sliced query to reduce memory footprint on export

reimandlab · Jun 7, 2017 · 16ddd9b · 16ddd9b
1 parent d05f75e
commit 16ddd9b
Showing 1 changed file with 12 additions and 2 deletions.
diff --git a/website/imports/mutations/__init__.py b/website/imports/mutations/__init__.py
@@ -306,10 +306,20 @@ def export(self, path=None, only_primary_isoforms=False):
         elif self.model_name == 'InheritedMutation':
             header += ['disease']
 
+        def yield_objects(base_query, step_size=1000):
+            done = False
+            step = 0
+            while not done:
+                obj = None
+                for obj in base_query.limit(step_size).offset(step * step_size):
+                    yield obj
+                step += 1
+                done = not obj
+
         with gzip.open(path, 'wt') as f:
             f.write('\t'.join(header))
 
-            for mutation in tqdm(self.model.query, total=fast_count(db.session.query(self.model))):
+            for mutation in tqdm(yield_objects(self.model.query), total=fast_count(db.session.query(self.model))):
                 tick += 1
 
                 m = mutation.mutation
@@ -354,7 +364,7 @@ def export(self, path=None, only_primary_isoforms=False):
                     del data
 
                 del mutation
-                if tick % 1000 == 0:
+                if tick % 10000 == 0:
                     import gc
                     gc.collect()