In [13]:
a = (1,2)
b = (1,2)
id(a), id(b)

(2236122503360, 2236122518336)

In [14]:
a = 10
b = 10
id(a), id(b), id(10)  # all share the memory address of 10

(140714377549872, 140714377549872, 140714377549872)

In [17]:
a = [1,2]
b = [3,4]
t = (a,b)
id(a), id(b), id(t)

(2236122793344, 2236122807936, 2236121379840)

In [18]:
a.append(3)
b.append(5)
id(a), id(b), id(t)
# tuple is immutable, but it contains mutable objects. The id of the tuple remains unchanged

(2236122793344, 2236122807936, 2236121379840)

In [19]:
a = 10

In [20]:
b = 10

In [21]:
a is b

True

In [23]:
a = 500
b = 500
a is b

False

In [24]:
a = 20
b = 20
a is b

True

In [40]:
a = 256
b = 256
a is b

True

In [39]:
a = 257
b = 257
a is b

False

### String Interning

In [42]:
import sys
a = 'hello'
b = 'hello'
a is b  # short strings, no spaces get the same memory address (generally)

True

In [44]:
a = 'hello world'
b = 'hello world'
a is b  # the space in the middle generally means different memory addresses

False

In [47]:
a = '_this_is_a_long_string_that_ould_be_used_as_an_identifier'
b = '_this_is_a_long_string_that_ould_be_used_as_an_identifier'
a is b  # a and be are "interned" because the string could be used to identify something (a function, a variable, ...)

True

In [50]:
a = sys.intern('hello world')
b = sys.intern('hello world')
c = 'hello world'
id(a), id(b), id(c)  # manually intern the string vals of a and b

(2236123738928, 2236123738928, 2236141231536)

In [51]:
a == b

True

In [53]:
a is b  # "is" compares the memory addresses of a and b which is much faster than "==" comparison

True

In [57]:
# Benchmark speed improvement
def compare_using_equals(n):
    a = 'a long string that is not interned' * 200
    b = 'a long string that is not interned' * 200
    for _ in range(n):
        if a == b:
            pass

def compare_using_interning(n):
    a = sys.intern('a long string that is not interned' * 200)
    b = sys.intern('a long string that is not interned' * 200)
    for i in range(n):
        if a is b:
            pass

In [59]:
import time
start = time.perf_counter()
compare_using_equals(10_000_000)
end = time.perf_counter()
print('equality', end - start)

equality 3.058651500000451


In [60]:
import time
start = time.perf_counter()
compare_using_interning(10_000_000)
end = time.perf_counter()
print('is', end - start)  # much faster!

is 0.4384718999999677


### Peephole Optimizations

In [67]:
def my_func():
    a = 24 * 60
    b = (1,2) * 5
    c = 'abc' * 3
    d = 'ab' * 15
    e = 'the quick brown fox' * 5
    f = ['a', 'b'] * 3  # not pre-calculated since it's a mutable (i.e. non-constant) value

In [68]:
my_func.__code__.co_consts

(None,
 1440,
 (1, 2, 1, 2, 1, 2, 1, 2, 1, 2),
 'abcabcabc',
 'ababababababababababababababab',
 'the quick brown foxthe quick brown foxthe quick brown foxthe quick brown foxthe quick brown fox',
 'a',
 'b',
 3)

In [69]:
def my_func(e):
    if e in [1,2,3]:
        pass

In [70]:
my_func.__code__.co_consts  # mutable element translated into immutable (list -> tuple)

(None, (1, 2, 3))

In [72]:
def my_func(e):
    if e in {1,2,3}:
        pass

my_func.__code__.co_consts  # set -> frozenset

(None, frozenset({1, 2, 3}))

In [73]:
# (quick and dirty) benchmark
# Set membership is more efficient than searching in a tuple or string or ...
import string
import time

string.ascii_letters

'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'

In [80]:
char_list = list(string.ascii_letters)
char_tuple = tuple(string.ascii_letters)
char_set = set(string.ascii_letters)

In [82]:
from timeit import timeit
list_time = timeit('a in char_list', globals=globals(), number=5_000_000)
tuple_time = timeit('a in char_tuple', globals=globals(), number=5_000_000)
set_time = timeit('a in char_set', globals=globals(), number=5_000_000)

In [83]:
list_time, tuple_time, set_time  # searching in a set is much faster => set membership is much more preferred

(3.114018700000088, 3.1686171000001195, 0.20964099999946484)