## 05_03: memory profiling

In [1]:
import sys
import collections

import numpy as np
import pandas as pd

import guppy # get guppy for Python 3 with "pip install guppy3" 

In [2]:
vector_list = [float(i) for i in range(1000000)]

In [3]:
vector_np = np.arange(0, 1000000, dtype='d')

In [4]:
sys.getsizeof(1.0)

24

In [5]:
sys.getsizeof(vector_list) + 1000000 * sys.getsizeof(1.0)

32697472

In [6]:
sys.getsizeof(vector_np)

8000096

In [7]:
%%file arrays.py

import numpy as np
import guppy

# initialize the guppy environment
hp = guppy.hpy()

# set reference point
hp.setrelheap()

# run memory-consuming operations
vector_list = [float(i) for i in range(1000000)]
vector_np = np.arange(0, 1000000, dtype='d')

# get a snapshot of heap allocations
print(hp.heap())

Writing arrays.py


In [8]:
!python arrays.py

Partition of a set of 1000003 objects. Total size = 40697992 bytes.
 Index  Count   %     Size   % Cumulative  % Kind (class / dict of class)
     0 1000000 100 24000000  59  24000000  59 float
     1      1   0  8697472  21  32697472  80 list
     2      1   0  8000096  20  40697568 100 numpy.ndarray
     3      1   0      424   0  40697992 100 types.FrameType


In [9]:
dict_list = [{'x': 1.0*i, 'y': 2.5*i} for i in range(100000)]

In [10]:
sys.getsizeof(dict_list[0])

248

In [11]:
sys.getsizeof(dict_list[0]) + 2 * sys.getsizeof(1.0)

296

In [12]:
XY_namedtuple = collections.namedtuple('XY', ['x', 'y'])
namedtuple_list = [XY_namedtuple(1.0*i, 2.5*i) for i in range(100000)]

In [13]:
sys.getsizeof(namedtuple_list[0])

72

In [15]:
from dataclasses import dataclass

In [16]:
@dataclass
class XY_dataclass:
    __slots__ = ('x', 'y')
    x: float
    y: float

In [17]:
dataclass_list = [XY_dataclass(1.0*i, 2.5*i) for i in range(100000)]

In [18]:
sys.getsizeof(dataclass_list[0])

64

In [19]:
pandas_dataframe = pd.DataFrame({'x': 1.0*i, 'y': 2.5*i} for i in range(100000))

In [20]:
pandas_dataframe._data

BlockManager
Items: Index(['x', 'y'], dtype='object')
Axis 1: RangeIndex(start=0, stop=100000, step=1)
FloatBlock: slice(0, 2, 1), 2 x 100000, dtype: float64

In [21]:
sys.getsizeof(pandas_dataframe)

1600160

In [22]:
%%file records.py

import collections
from dataclasses import dataclass

import pandas as pd
import guppy

hp = guppy.hpy()

hp.setrelheap()
dict_list = [{'x': 1.0*i, 'y': 2.5*i} for i in range(100000)]
print(hp.heap())

hp.setrelheap()
XY_namedtuple = collections.namedtuple('XY', ['x', 'y'])
namedtuple_list = [XY_namedtuple(1.0*i, 2.5*i) for i in range(100000)]
print(); print(hp.heap())

hp.setrelheap()
@dataclass
class XY_dataclass:
    __slots__= ('x', 'y')
    x: float
    y: float
dataclass_list = [XY_dataclass(1.0*i, 2.5*i) for i in range(100000)]
print(); print(hp.heap())

hp.setrelheap()
pandas_dataframe = pd.DataFrame({'x': 1.0*i, 'y': 2.5*i} for i in range(100000))
print(); print(hp.heap())

Writing records.py


In [23]:
!python records.py

Partition of a set of 300002 objects. Total size = 30424896 bytes.
 Index  Count   %     Size   % Cumulative  % Kind (class / dict of class)
     0 100000  33 24800000  82  24800000  82 dict (no owner)
     1 200000  67  4800000  16  29600000  97 float
     2      1   0   824472   3  30424472 100 list
     3      1   0      424   0  30424896 100 types.FrameType

Partition of a set of 300046 objects. Total size = 12830167 bytes.
 Index  Count   %     Size   % Cumulative  % Kind (class / dict of class)
     0 100000  33  7200000  56   7200000  56 __main__.XY
     1 200000  67  4800000  37  12000000  94 float
     2      1   0   824472   6  12824472 100 list
     3      1   0      896   0  12825368 100 type
     4      6   0      864   0  12826232 100 function
     5     12   0      832   0  12827064 100 str
     6      1   0      656   0  12827720 100 dict of type
     7      9   0      632   0  12828352 100 tuple
     8      2   0      496   0  12828848 100 dict (no owner)
     9      1