In [1]:
class C:
    """A class to demonstrate overhead of invoking methods."""
    def m1(self, a: str, b: str) -> str:
        """The simplest case, just join the strings."""
        return a + ' ' + b

    def m2(self, a: str, b: str) -> str:
        """The overhead of invoking such a simple method."""
        return c.m1(a, b)

    def m3(self) -> str:
        """Eliminate the argument forwarding."""
        return c.m1('hello', 'world')

    def m4(self, a: str, b: str) -> str:
        """Add a second function call to emphasize the function invocation overhead."""
        return c.m2(b, a)

"""Non-method versions of the same."""
def f1(a: str, b: str) -> str:
    return a + ' ' + b

def f2(a: str, b: str) -> str:
    return f1(a, b)

def f3() -> str:
    return f1('hello', 'world')
    
def f4(a: str, b: str) -> str:
    return f2(b, a)

In [2]:
# Instantiate an instance of C and benchmark our methods.
c = C()

# This may take upto a minute to start producing output.
print("Benchmarking, Please Wait...")
%timeit -r 21 -n 10000000 c.m1('hello', 'world')
%timeit -r 21 -n 10000000 c.m2('hello', 'world')
%timeit -r 21 -n 10000000 c.m3()
%timeit -r 21 -n 10000000 c.m4('world', 'hello')

Benchmarking, Please Wait...
155 ns ± 12.3 ns per loop (mean ± std. dev. of 21 runs, 10000000 loops each)
219 ns ± 7.55 ns per loop (mean ± std. dev. of 21 runs, 10000000 loops each)
211 ns ± 10.9 ns per loop (mean ± std. dev. of 21 runs, 10000000 loops each)
324 ns ± 6.06 ns per loop (mean ± std. dev. of 21 runs, 10000000 loops each)


In [3]:
# Non-method, direct function all, versions of the same.
print("Benchmarking, Please Wait...")
%timeit -r 21 -n 10000000 f1('hello', 'world')
%timeit -r 21 -n 10000000 f2('hello', 'world')
%timeit -r 21 -n 10000000 f3()
%timeit -r 21 -n 10000000 f4('world', 'hello')

Benchmarking, Please Wait...
142 ns ± 12.4 ns per loop (mean ± std. dev. of 21 runs, 10000000 loops each)
195 ns ± 9.3 ns per loop (mean ± std. dev. of 21 runs, 10000000 loops each)
184 ns ± 7.02 ns per loop (mean ± std. dev. of 21 runs, 10000000 loops each)
265 ns ± 9.59 ns per loop (mean ± std. dev. of 21 runs, 10000000 loops each)


In [18]:
### Punch in the timings from above.
# Measure the overhead of the m4/f4 which uses 2 function calls to add the strings.
print(f"2 fn calls -> direct implementation {float(312) * 100 / 143:6.2f}%")

# Measure the overhead of calling a class-method vs a simple function.
print(f"m1->f1 {float(155) * 100 / 142:6.2f}%")
print(f"m2->f2 {float(219) * 100 / 195:6.2f}%")
print(f"m3->f3 {float(211) * 100 / 184:6.2f}%")
print(f"m4->f4 {float(324) * 100 / 265:6.2f}%")

2 fn calls -> direct implementation 218.18%
m1->f1 109.15%
m2->f2 112.31%
m3->f3 114.67%
m4->f4 122.26%


In [5]:
class Resource:
  def __init__(self, protocol, host, path):
    self._protocol = protocol
    self._host = host
    self._path = path
    self._args = None

  @property
  def protocol(self): return self._protocol
  @property
  def host(self): return self._host
  @property
  def path(self): return self._path
  @property
  def args(self): return self._args or ""
  @property
  def prefix(self): return self.protocol + "://" + self.host

  @property
  def uri(self):
    # Cache the uri when we do produce it.
    return self.prefix + self.path + self.args

  @property
  def uri_test(self):
    for i in range(24):
      self.uri

print(Resource("https", "wiki.python.org", "moin/PythonSpeed/PerformanceTips").uri)

https://wiki.python.orgmoin/PythonSpeed/PerformanceTips


In [6]:
class Versus:
  def __init__(self, protocol, host, path):
    self.protocol = protocol
    self.host = host
    self.path = path
    self.args = None

  def prefix(self):
    return self.protocol + "://" + self.host

  def uri(self):
    return self.protocol + "://" + self.host + "/" + self.path + (self.args or "")

  def uri_test(self):
    for i in range(24):
      self.uri()

print(Versus("https", "wiki.python.org", "moin/PythonSpeed/PerformanceTips").uri())

https://wiki.python.org/moin/PythonSpeed/PerformanceTips


In [7]:
%timeit -r 21 Resource("https", "wiki.python.org", "moin/PythonSpeed/PerformanceTips").uri_test
%timeit -r 21 Versus("https", "wiki.python.org", "moin/PythonSpeed/PerformanceTips").uri_test()

15.9 µs ± 985 ns per loop (mean ± std. dev. of 21 runs, 100000 loops each)
7.82 µs ± 371 ns per loop (mean ± std. dev. of 21 runs, 100000 loops each)


In [8]:
def simple_vars(protocol, host, path, args):
    for i in range(24):
        s = protocol + "://" + host + "/" + path + (args or '')

%timeit -r 25 simple_vars("https", "wiki.python.org", "moin/PythonSpeed/PerformanceTips", None)

4.52 µs ± 259 ns per loop (mean ± std. dev. of 25 runs, 100000 loops each)


In [9]:
def f_in_simple_vars(protocol, host, path, args):
    for i in range(24):
        s = f"{protocol}://{host}/{path}{args or None}"
   
%timeit -r 25 f_in_simple_vars('https', 'wiki.python.org', 'moin/PythonSpeed/PerformanceTips', None)

5.29 µs ± 319 ns per loop (mean ± std. dev. of 25 runs, 100000 loops each)


In [10]:
numbers = [i for i in range(100000)]

def increment(a: int, b: int) -> int:
    return a + b

def i1(numbers) -> int:
    result = 0
    for i in numbers:
        result = increment(result, i)
    return result

def i2(numbers) -> int:
    result = 0
    for i in numbers:
        result = result + i
    return result

%timeit i1(numbers)
%timeit i2(numbers)

9.53 ms ± 270 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
3.95 ms ± 482 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [11]:
class Joiner:
    join_char = ','
    def put_together(self, a, b, c):
        self.result = a + self.join_char
        self.result += b + self.join_char
        self.result += c + self.join_char

import dis ; dis.dis(Joiner.put_together)

4           0 LOAD_FAST                1 (a)
              2 LOAD_FAST                0 (self)
              4 LOAD_ATTR                0 (join_char)
              6 BINARY_ADD
              8 LOAD_FAST                0 (self)
             10 STORE_ATTR               1 (result)

  5          12 LOAD_FAST                0 (self)
             14 DUP_TOP
             16 LOAD_ATTR                1 (result)
             18 LOAD_FAST                2 (b)
             20 LOAD_FAST                0 (self)
             22 LOAD_ATTR                0 (join_char)
             24 BINARY_ADD
             26 INPLACE_ADD
             28 ROT_TWO
             30 STORE_ATTR               1 (result)

  6          32 LOAD_FAST                0 (self)
             34 DUP_TOP
             36 LOAD_ATTR                1 (result)
             38 LOAD_FAST                3 (c)
             40 LOAD_FAST                0 (self)
             42 LOAD_ATTR                0 (join_char)
             44 BINARY_ADD
     

In [12]:
def join_these_things_v1(things, into):
    for thing in things:
      into.things += thing + into.separator

import dis
dis.dis(join_these_things_v1)

2           0 SETUP_LOOP              32 (to 34)
              2 LOAD_FAST                0 (things)
              4 GET_ITER
        >>    6 FOR_ITER                24 (to 32)
              8 STORE_FAST               2 (thing)

  3          10 LOAD_FAST                1 (into)
             12 DUP_TOP
             14 LOAD_ATTR                0 (things)
             16 LOAD_FAST                2 (thing)
             18 LOAD_FAST                1 (into)
             20 LOAD_ATTR                1 (separator)
             22 BINARY_ADD
             24 INPLACE_ADD
             26 ROT_TWO
             28 STORE_ATTR               0 (things)
             30 JUMP_ABSOLUTE            6
        >>   32 POP_BLOCK
        >>   34 LOAD_CONST               0 (None)
             36 RETURN_VALUE


In [13]:
def join_these_things_v2(things, into):
    into.things.extend(thing + into.separator for thing in things)

import dis
dis.dis(join_these_things_v2)

2           0 LOAD_DEREF               0 (into)
              2 LOAD_ATTR                0 (things)
              4 LOAD_METHOD              1 (extend)
              6 LOAD_CLOSURE             0 (into)
              8 BUILD_TUPLE              1
             10 LOAD_CONST               1 (<code object <genexpr> at 0x00000259DD98BE40, file "<ipython-input-13-eed2b012bb4d>", line 2>)
             12 LOAD_CONST               2 ('join_these_things_v2.<locals>.<genexpr>')
             14 MAKE_FUNCTION            8
             16 LOAD_FAST                0 (things)
             18 GET_ITER
             20 CALL_FUNCTION            1
             22 CALL_METHOD              1
             24 POP_TOP
             26 LOAD_CONST               0 (None)
             28 RETURN_VALUE

Disassembly of <code object <genexpr> at 0x00000259DD98BE40, file "<ipython-input-13-eed2b012bb4d>", line 2>:
  2           0 LOAD_FAST                0 (.0)
        >>    2 FOR_ITER                16 (to 20)
          

In [14]:
class Test:
    things = []
    separator = ","

things = [str(i) for i in range(2000)]

%timeit join_these_things_v1(things, Test())
%timeit join_these_things_v2(things, Test())

496 µs ± 47.4 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
203 µs ± 8.17 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [22]:
def non_agg_total_percentage(a, b):
    return a * 100 / b

def non_agg_invoker(values):
    total = 0
    for a, b in values:
        total += non_agg_total_percentage(a, b)
    return total / len(values)

def agg_total_percentage(values):
    return sum((a * 100 / b) for a, b in values)

def agg_invoker(values):
    return agg_total_percentage(values) / len(values)

values = [(a, b) for a, b in zip(range(30000, 90000, 3), range(20000, 140000, 6))]
%timeit non_agg_invoker(values)
%timeit agg_invoker(values)

2.72 ms ± 34 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
1.86 ms ± 91 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [16]:
def f1(values):
    for i in range(10000):
        values.append(i)

def f2(values):
    append = values.append
    for i in range(10000):
        append(i)

%timeit f1([])
%timeit f2([])

552 µs ± 15.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
422 µs ± 7.77 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [17]:
import re
creditsPattern = r"^.*? ([+-]?\d+) [Cc][Rr]"
creditsRe = re.compile(creditsPattern)
str1 = "Lorem ipsum, unfortunately I don't match. +123 Coins"
str2 = "Well +123 Charlie, I do match because I have -321 cRs."

def s1():
    for i in range(300000):
        m = re.match(creditsPattern, str1)
        m = re.match(creditsPattern, str2)

def s2():
    for i in range(300000):
        m = creditsRe.match(str1)
        m = creditsRe.match(str2)

def s3():
    creditsMatch = creditsRe.match
    for i in range(300000):
        m = creditsMatch(str1)
        m = creditsMatch(str2)

creditsReMatch = creditsRe.match
def s4():
    for i in range(300000):
        m = creditsReMatch(str1)
        m = creditsReMatch(str2)

%timeit s1()
%timeit s2()
%timeit s3()
%timeit s4()

1.05 s ± 14 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
789 ms ± 6.96 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
773 ms ± 11.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
783 ms ± 5.58 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
