# Memory tests

In this notebook, we'll do a simple comparison of the amount of memory taken up by different Python objects.

We'll start by getting things set up and defining a custom object.

In [1]:
from sys import getsizeof
import json
from pandas import DataFrame

In [2]:
def get_size(obj, seen=None):
    # From https://goshippo.com/blog/measure-real-size-any-python-object/
    # Recursively finds size of objects
    size = getsizeof(obj)
    if seen is None:
        seen = set()
    obj_id = id(obj)
    if obj_id in seen:
        return 0
    # Important mark as seen *before* entering recursion to gracefully handle
    # self-referential objects
    seen.add(obj_id)
    if isinstance(obj, dict):
        size += sum([get_size(v, seen) for v in obj.values()])
        size += sum([get_size(k, seen) for k in obj.keys()])
    elif hasattr(obj, '__dict__'):
        size += get_size(obj.__dict__, seen)
    elif hasattr(obj, '__iter__') and not isinstance(obj, (str, bytes, bytearray)):
        size += sum([get_size(i, seen) for i in obj])
    return size

In [5]:
a = 3
get_size(a), get_size(12), get_size([]), get_size([1])

(28, 28, 64, 100)

In [27]:
class CustomItem(object):
    __slots__ = ['a', 'b', 'c', 'd']
    def __init__(self, a, b, c, d):
        self.a = a
        self.b = b
        self.c = c
        self.d = d
        
    def to_dict(self):
        data = dict()
        for var in self.__slots__:
            data[var] = getattr(self, var)
        return data
    
    def to_json_str(self):
        return json.dumps(self.to_dict())
    
print(type(CustomItem(1, 1, 1, ['a']).to_dict())), print(CustomItem(1, 1, 1, ['a']).to_dict())
print(type(CustomItem(1, 1, 1, ['a']).to_json_str())), print(CustomItem(1, 1, 1, ['a']).to_json_str())

<class 'dict'>
{'a': 1, 'b': 1, 'c': 1, 'd': ['a']}
<class 'str'>
{"a": 1, "b": 1, "c": 1, "d": ["a"]}


(None, None)

In [31]:
print(set([CustomItem(1, 1, 1, ['a']).to_json_str(), CustomItem(1, 1, 1, ['a']).to_json_str()]))
print(set([CustomItem(1, 1, 1, ['a']).to_json_str(), CustomItem(1, 1, 1, ['b']).to_json_str()]))
print(set([CustomItem(1, 1, 1, ['a']).to_json_str(), CustomItem(1, 1, 2, ['a']).to_json_str()]))

{'{"a": 1, "b": 1, "c": 1, "d": ["a"]}'}
{'{"a": 1, "b": 1, "c": 1, "d": ["b"]}', '{"a": 1, "b": 1, "c": 1, "d": ["a"]}'}
{'{"a": 1, "b": 1, "c": 2, "d": ["a"]}', '{"a": 1, "b": 1, "c": 1, "d": ["a"]}'}


## Commence the tests

In [38]:
print('empty DataFrame size: {:,}'.format(get_size(DataFrame())))
print('DataFrame with two columns and one row: {:,}'.format(get_size(DataFrame({'a': [1], 'b': ['a']}))))
print('DataFrame with two rows and one column: {:,}'.format(get_size(DataFrame({'a': [1, 'r']}))))
print('DataFrame with 10,000 rows and one column: {:,}'.format(get_size(DataFrame({'a': list(range(10000))}))))

empty DataFrame size: 795
DataFrame with two columns and one row: 4,097
DataFrame with two rows and one column: 3,287
DataFrame with 10,000 rows and one column: 443,007


In [6]:
list_sizes = [100, 10000, 50000, 100000]
print('Sizes of lists of integers')
for size in list_sizes:
    holder = []
    for i in range(size):
        holder.append(i)
    print('Num elements: {:,}\tSize: {:,}'.format(size, get_size(holder)))

Sizes of lists of integers
Num elements: 100	Size: 3,708
Num elements: 10,000	Size: 367,620
Num elements: 50,000	Size: 1,806,492
Num elements: 100,000	Size: 3,624,460


In [8]:
print('Sizes of lists of empty lists')
for size in list_sizes:
    holder = []
    for i in range(size):
        holder.append([])
    print('Num elements: {:,}\tSize: {:,}'.format(size, get_size(holder)))
    
print('\nSizes of lists of lists with a single element')
for size in list_sizes:
    holder = []
    for i in range(size):
        holder.append([i])
    print('Num elements: {:,}\tSize: {:,}'.format(size, get_size(holder)))

Sizes of lists of empty lists
Num elements: 100	Size: 7,312
Num elements: 10,000	Size: 727,624
Num elements: 50,000	Size: 3,606,496
Num elements: 100,000	Size: 7,224,464

Sizes of lists of lists with a single element
Num elements: 100	Size: 10,908
Num elements: 10,000	Size: 1,087,620
Num elements: 50,000	Size: 5,406,492
Num elements: 100,000	Size: 10,824,460


In [33]:
print('Sizes of lists of empty sets')
for size in list_sizes:
    holder = []
    for i in range(size):
        holder.append(set())
    print('Num elements: {:,}\tSize: {:,}'.format(size, get_size(holder)))
    
print('\nSizes of lists of sets with a single element')
for size in list_sizes:
    holder = []
    for i in range(size):
        holder.append(set([i]))
    print('Num elements: {:,}\tSize: {:,}'.format(size, get_size(holder)))

Sizes of lists of empty sets
Num elements: 100	Size: 23,312
Num elements: 10,000	Size: 2,327,624
Num elements: 50,000	Size: 11,606,496
Num elements: 100,000	Size: 23,224,464

Sizes of lists of sets with a single element
Num elements: 100	Size: 26,108
Num elements: 10,000	Size: 2,607,620
Num elements: 50,000	Size: 13,006,492
Num elements: 100,000	Size: 26,024,460


In [10]:
print('Sizes of lists of empty dictionaries')
for size in list_sizes:
    holder = []
    for i in range(size):
        holder.append({})
    print('Num elements: {:,}\tSize: {:,}'.format(size, get_size(holder)))
    
print('\nSizes of lists of dictionaries with one element')
for size in list_sizes:
    holder = []
    for i in range(size):
        holder.append({'a': i})
    print('Num elements: {:,}\tSize: {:,}'.format(size, get_size(holder)))

Sizes of lists of empty dictionaries
Num elements: 100	Size: 24,912
Num elements: 10,000	Size: 2,487,624
Num elements: 50,000	Size: 12,406,496
Num elements: 100,000	Size: 24,824,464

Sizes of lists of dictionaries with one element
Num elements: 100	Size: 27,758
Num elements: 10,000	Size: 2,767,670
Num elements: 50,000	Size: 13,806,542
Num elements: 100,000	Size: 27,624,510


In [12]:
print('Sizes of lists of dictionaries with four elements')
for size in list_sizes:
    holder = []
    for i in range(size):
        holder.append({'a': i, 'b': i, 'c': i, 'd': [i]})
    print('Num elements: {:,}\tSize: {:,}'.format(size, get_size(holder)))
    
print('\nSizes of lists of CustomItems')
for size in list_sizes:
    holder = []
    for i in range(size):
        holder.append(CustomItem(i, i, i, [i]))
    print('Num elements: {:,}\tSize: {:,}'.format(size, get_size(holder)))

Sizes of lists of dictionaries with four elements
Num elements: 100	Size: 35,108
Num elements: 10,000	Size: 3,487,820
Num elements: 50,000	Size: 17,406,692
Num elements: 100,000	Size: 34,824,660

Sizes of lists of CustomItems
Num elements: 100	Size: 8,112
Num elements: 10,000	Size: 807,624
Num elements: 50,000	Size: 4,006,496
Num elements: 100,000	Size: 8,024,464


That's a remarkable difference. The lists of CustomItems are just slightly bigger than the lists of empty lists!