In [3]:
a = 10
b = 10
a is b

True

In [2]:
a = 500
b = 500
a is b

False

as you can see this property is not exhibited by every integer. This is a python optimization stratergy called interning. Python caches integer objects from [[-5,256]] and then whenever the need be, it returns the cached version of the object

## String Interning

In [3]:
a = 'hello'
b = 'hello'

a is b

True

In [4]:
c = 'hello world'
d = 'hello world'

c is d

False

the values of c and d dont look like variable names, hence they didn't have the same address

In [5]:
#explicit interning
import sys

In [9]:
a = sys.intern('hello world')
b = sys.intern('hello world')
c = 'hello world'

print(a is b)
print(a is b is c)

True
False


Benchmarking string interned comparision vs normal string comparision

In [10]:
def compare_using_equals(n):
    a = 'a long string that is not interned' * 200
    b = 'a long string that is not interned' * 200

    for i in range(n):
        if a == b:
            pass

In [11]:
def compare_using_interning(n):
    a = sys.intern('a long string that is not interned' * 200)
    b = sys.intern('a long string that is not interned' * 200)

    for i in range(n):
        if a is b:
            pass

In [12]:
import time

In [15]:
start = time.perf_counter()
compare_using_equals(100000000)
end = time.perf_counter()
print('equality time: ',end-start)

equality time:  14.901438808999956


In [16]:
start = time.perf_counter()
compare_using_interning(100000000)
end = time.perf_counter()
print('equality time: ',end-start)

equality time:  2.77228887199999


# <b>Just look at the time difference!!! it is drastically faster than normal string comparisions</b>

### Compile time expression caching

In [17]:
def my_func():
    a = 24 * 60
    b = (1, 2) * 5
    c = 'abc' * 3
    d = 'ab' * 11
    e = 'the quick brown fox' * 5
    f = ['a', 'b'] * 3

In [18]:
my_func.__code__.co_consts

(None,
 1440,
 (1, 2, 1, 2, 1, 2, 1, 2, 1, 2),
 'abcabcabc',
 'ababababababababababab',
 'the quick brown foxthe quick brown foxthe quick brown foxthe quick brown foxthe quick brown fox',
 'a',
 'b',
 3)

Python sets the limit to be cached to be 20 characters. If the length of the object is greater than that, Python won't cache it. In this case, [['a','b']] * 3 wasn't cached

In [19]:
def my_func(e):
    if e in [1,2,3]:
        pass

In [20]:
my_func.__code__.co_consts

(None, (1, 2, 3))

Python will automatically convert any sequence type of memberships into an immutable one. So constant lists would basically be converted to tuples at compile time

In [21]:
def my_func(e):
    if e in {1,2,3}:
        pass

In [22]:
my_func.__code__.co_consts

(None, frozenset({1, 2, 3}))

Set becomes frozenset at compile time

# Searching in Sets vs Lists/Tuples

In [23]:
import string
import time

In [24]:
string.ascii_letters

'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'

In [25]:
char_list = list(string.ascii_letters)
char_tuple = list(string.ascii_letters)
char_set = set(string.ascii_letters)

In [29]:
def membership_test(n, container):
    for i in range(n):
        if 'z' in container:
            pass

In [33]:
start = time.perf_counter()
membership_test(100000000,char_list)
end = time.perf_counter()
print('list time: ',end-start)

list time:  40.72099226499995


In [34]:
start = time.perf_counter()
membership_test(100000000,char_tuple)
end = time.perf_counter()
print('tuple time: ',end-start)

tuple time:  52.768762958000025


In [35]:
start = time.perf_counter()
membership_test(100000000,char_set)
end = time.perf_counter()
print('list time: ',end-start)

list time:  3.295972010999776
