Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
branch: master
Fetching contributors…

Octocat-spinner-32-eaf2f5

Cannot retrieve contributors at this time

file 110 lines (90 sloc) 3.432 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109
"""
Conversion, validation, and size measurement utilities for Python's
built-in types.
"""

import re
import sys
from math import log, log10, ceil

_postImportVars = vars().keys()


_64bitPy = sys.maxint > 2**31 - 1
_bytesPerWord = 8 if _64bitPy else 4
_gcHeaderSize = 24 if _64bitPy else 12 # Just a guess
_UCS4 = sys.maxunicode > 2**16 - 1
_bytesPerCodePoint = 4 if _UCS4 else 2

def basicGetSizeOf(obj):
"""
Works like L{sys.getsizeof}, but only returns reasonable numbers for
a limited set of types: str, unicode, list, tuple, dict, set, frozenset,
bool, NoneType, int, float, long.

Many of these numbers are guesses. Don't use this if L{sys.getsizeof}
is available.
"""
if isinstance(obj, str):
return _gcHeaderSize + len(obj)
elif isinstance(obj, unicode):
return _gcHeaderSize + _bytesPerCodePoint * len(obj)
elif isinstance(obj, int):
return _gcHeaderSize + _bytesPerWord
elif isinstance(obj, float):
return _gcHeaderSize + 8
elif isinstance(obj, long):
return _gcHeaderSize + int(ceil(log(obj, 2)))
elif isinstance(obj, (list, tuple)):
return _gcHeaderSize + _bytesPerWord + _bytesPerWord * len(obj)
elif isinstance(obj, dict):
return _gcHeaderSize + _bytesPerWord * (750 + 8 * len(obj))
elif isinstance(obj, (set, frozenset)):
return _gcHeaderSize + _bytesPerWord * (750 + 5 * len(obj))
else:
return _gcHeaderSize + _bytesPerWord
# TODO: handle C{complex}es

try:
from sys import getsizeof
except ImportError:
getsizeof = basicGetSizeOf


def totalSizeOf(obj, _alreadySeen=None):
"""
Get the size of object C{obj} using L{sys.getsizeof} or L{basicGetSizeOf}
on the object itself and all of its children recursively. If the same
object appears more than once inside C{obj}, it is counted only once.

This only works properly if C{obj} is a str, unicode, list, tuple, dict,
set, frozenset, bool, NoneType, int, complex, float, long, or any nested
combination of the above. C{obj} is allowed to have circular references.

This is particularly useful for getting a good estimate of how much
memory a JSON-decoded object is using after receiving it.

Design notes: L{sys.getsizeof} or L{basicGetSizeOf} return reasonable
numbers, but do not recurse into the object's children. As we recurse
into the children, we keep track of objects we've already counted for two
reasons:
- If we've already counted the object's memory usage, we don't
want to count it again.
- As a bonus, we handle circular references gracefully.

This function assumes that containers do not modify their children as
they are traversed.

If your Python is < 2.6, the returned size will be less accurate, because
L{basicGetSizeOf} is used instead of L{sys.getsizeof}.
"""
if _alreadySeen is None:
_alreadySeen = set()

total = getsizeof(obj)
_alreadySeen.add(id(obj))

if isinstance(obj, dict):
# Count the memory usage of both the keys and values.
for k, v in obj.iteritems():
if not id(k) in _alreadySeen:
total += totalSizeOf(k, _alreadySeen)
if not id(v) in _alreadySeen:
total += totalSizeOf(v, _alreadySeen)
else:
try:
iterator = obj.__iter__()
except (TypeError, AttributeError):
pass
else:
for item in iterator:
if not id(item) in _alreadySeen:
total += totalSizeOf(item, _alreadySeen)

return total


try: from refbinder.api import bindRecursive
except ImportError: pass
else: bindRecursive(sys.modules[__name__], _postImportVars)
Something went wrong with that request. Please try again.