# Performance optimizations

This notebook describes performance optimizations that are available for the `Potential` class.

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import time
import asyncio
from arsenal.timer import timeit

## Autobatching concurrent requests

In [3]:
from genlm_control.potential import Potential


class TimedPotential(Potential):
    async def complete(self, context):
        time.sleep(0.5)
        return len(context)

    async def prefix(self, context):
        time.sleep(0.5)
        return len(context)

    # Batched methods are much quicker than sequentially
    # calling the instance methods.

    async def batch_complete(self, contexts):
        time.sleep(0.55)
        return [len(context) for context in contexts]

    async def batch_prefix(self, contexts):
        time.sleep(0.55)
        return [len(context) for context in contexts]

    def __repr__(self):
        return "TimedPotential()"


potential = TimedPotential(list(range(256)))

INFO 02-11 12:17:51 __init__.py:183] Automatically detected platform cuda.


In [4]:
autobatched = potential.to_autobatched()
autobatched

AutoBatchedPotential(TimedPotential())

In [5]:
sequences = [b"hello", b"cats", b"foo", b"fy"]

# Concurrent requests to complete will be automatically batched
# and processed by the batch_complete method.

with timeit("without autobatching"):
    results = await asyncio.gather(*(potential.complete(seq) for seq in sequences))

with timeit("with autobatching"):
    results_autobatched = await asyncio.gather(
        *(autobatched.complete(seq) for seq in sequences)
    )

without autobatching (2.0009 sec)
with autobatching (0.5504 sec)


In [6]:
# Results are the same whether we use autobatching or not.
results, results_autobatched

([5, 4, 3, 2], [5, 4, 3, 2])

## CPU Parallelization

In [7]:
class TimedPotential(Potential):
    async def complete(self, context):
        time.sleep(1)
        return len(context)

    async def prefix(self, context):
        time.sleep(1)
        return len(context)

    # These are the default implementations of batch_complete and batch_prefix
    # which subclasses inherit. We repeat them here for clarity.
    async def batch_complete(self, contexts):
        return await asyncio.gather(*(self.complete(context) for context in contexts))

    async def batch_prefix(self, contexts):
        return await asyncio.gather(*(self.prefix(context) for context in contexts))

    def spawn(self):
        return TimedPotential(self.decode)


potential = TimedPotential(list(range(256)))

In [9]:
mp_potential = potential.to_multiprocess(num_workers=2)
mp_potential

MPPotential(self.num_workers=2)

`MPPotential` is a parallel processing wrapper around another potential class. It allows running multiple instances of a potential in parallel across different CPU cores, which can significantly speed up processing.

In [10]:
with timeit("without multiprocessing"):
    results = await potential.batch_complete(sequences)

with timeit("with multiprocessing"):
    results_mp = await mp_potential.batch_complete(sequences)

without multiprocessing (4.0010 sec)
with multiprocessing (2.0022 sec)


In [11]:
results, results_mp

([5, 4, 3, 2], array([5, 4, 3, 2]))

In [12]:
with timeit("without multiprocessing"):
    results = await asyncio.gather(*(potential.complete(seq) for seq in sequences))

with timeit("with multiprocessing"):
    results_mp = await asyncio.gather(
        *(mp_potential.complete(seq) for seq in sequences)
    )

without multiprocessing (4.0010 sec)
with multiprocessing (2.0017 sec)


In [13]:
results, results_mp

([5, 4, 3, 2], [5, 4, 3, 2])