## 05_03: memory profiling

In [None]:
import sys
import collections

import numpy as np
import pandas as pd

import guppy # get guppy for Python 3 with "pip install guppy3" 

In [None]:
vector_list = [float(i) for i in range(1000000)]

In [None]:
vector_np = np.arange(0, 1000000, dtype='d')

In [None]:
sys.getsizeof(1.0)

In [None]:
sys.getsizeof(vector_list) + 1000000 * sys.getsizeof(1.0)

In [None]:
sys.getsizeof(vector_np)

In [None]:
%%file arrays.py

import numpy as np
import guppy

# initialize the guppy environment
hp = guppy.hpy()

# set reference point
hp.setrelheap()

# run memory-consuming operations
vector_list = [float(i) for i in range(1000000)]
vector_np = np.arange(0, 1000000, dtype='d')

# get a snapshot of heap allocations
print(hp.heap())

In [None]:
!python arrays.py

In [None]:
dict_list = [{'x': 1.0*i, 'y': 2.5*i} for i in range(100000)]

In [None]:
sys.getsizeof(dict_list[0])

In [None]:
sys.getsizeof(dict_list[0]) + 2 * sys.getsizeof(1.0)

In [None]:
XY_namedtuple = collections.namedtuple('XY', ['x', 'y'])
namedtuple_list = [XY_namedtuple(1.0*i, 2.5*i) for i in range(100000)]

In [None]:
sys.getsizeof(namedtuple_list[0])

In [None]:
@dataclass
class XY_dataclass:
    __slots__ = ('x', 'y')
    x: float
    y: float

In [None]:
dataclass_list = [XY_dataclass(1.0*i, 2.5*i) for i in range(100000)]

In [None]:
sys.getsizeof(dataclass_list[0])

In [None]:
pandas_dataframe = pd.DataFrame({'x': 1.0*i, 'y': 2.5*i} for i in range(100000))

In [None]:
pandas_dataframe._data

In [None]:
sys.getsizeof(pandas_dataframe)

In [None]:
%%file records.py

import collections
from dataclasses import dataclass

import pandas as pd
import guppy

hp = guppy.hpy()

hp.setrelheap()
dict_list = [{'x': 1.0*i, 'y': 2.5*i} for i in range(100000)]
print(hp.heap())

hp.setrelheap()
XY_namedtuple = collections.namedtuple('XY', ['x', 'y'])
namedtuple_list = [XY_namedtuple(1.0*i, 2.5*i) for i in range(100000)]
print(); print(hp.heap())

hp.setrelheap()
@dataclass
class XY_dataclass:
    __slots__= ('x', 'y')
    x: float
    y: float
dataclass_list = [XY_dataclass(1.0*i, 2.5*i) for i in range(100000)]
print(); print(hp.heap())

hp.setrelheap()
pandas_dataframe = pd.DataFrame({'x': 1.0*i, 'y': 2.5*i} for i in range(100000))
print(); print(hp.heap())

In [None]:
!python records.py