Better calculation of array size.

This gives a much more accurate estimate when we are dealing with many small arrays that are merged into one. For example: For 6 elements array the overhead of sys.getsizeof gives a size increase with a factor of 6 In [2]: import numpy as np In [3]: d = np.array([[1,2,3],[4,5,6]]) In [4]: import sys In [5]: sys.getsizeof(d) Out[5]: 152 In [6]: d.dtype.itemsize * d.size Out[6]: 24
microsoft · Nov 16, 2023 · 3123af7 · 3123af7
1 parent bbf9651
commit 3123af7
Showing 1 changed file with 3 additions and 4 deletions.
diff --git a/src/qcodes/dataset/data_set.py b/src/qcodes/dataset/data_set.py
@@ -3,7 +3,6 @@
 import importlib
 import json
 import logging
-import sys
 import tempfile
 import time
 import uuid
@@ -1530,15 +1529,15 @@ def _estimate_ds_size(self) -> float:
         Give an estimated size of the dataset as the size of a single row
         times the len of the dataset. Result is returned in Mega Bytes.
 
-        Note that this does not take overhead into account so it is more accurate
-        if the row size is "large"
+        Note that this does not take overhead from storing the array into account
+        so it is assumed that the total array is large compared to the overhead.
         """
         sample_data = self.get_parameter_data(start=1, end=1)
         row_size = 0.0
 
         for param_data in sample_data.values():
             for array in param_data.values():
-                row_size += sys.getsizeof(array)
+                row_size += array.size * array.dtype.itemsize
         return row_size * len(self) / 1024 / 1024