In [80]:
from typing import Dict, Generator

# Fibonacci sequence

$$ 
fib(n) = fib(n - 1) + fib(n - 2)
$$

Recursion

In [30]:
def fib1(n: int) -> int:
    if n < 2:
        return n
    return fib1(n - 1) + fib1(n - 2)

In [32]:
%timeit fib1(5)

1.32 µs ± 14.7 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


Memoization

In [38]:
n = 50

In [76]:
# use a wrapper function to clear the cache so %timeit is correct
def wrapper(n):
    memo: Dict[int, int] = {0: 0, 1: 1}

    def fib3(n: int) -> int:
        if n not in memo:
            memo[n] = fib3(n - 1) + fib3(n - 2)
        return memo[n]
    
    return fib3(n)

In [77]:
%timeit wrapper(n)

15.5 µs ± 129 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


Automatic memoization

In [78]:
from functools import lru_cache

In [79]:
def wrapper(n):
    @lru_cache(maxsize=None)
    def fib4(n: int) -> int:
        if n < 2:
            return n
        return fib4(n - 1) + fib4(n - 2)
    return fib4(n)

In [73]:
%timeit wrapper(n)

18.1 µs ± 194 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


Iterative approach

In [74]:
def fib5(n: int) -> int:
    if n == 0:
        return n
    last: int = 0
    next: int = 1
    for _ in range(1, n):
        last, next = next, last + next
    return next

In [75]:
%timeit fib5(n)

2.08 µs ± 72.5 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


Use Generator

In [82]:
def fib6(n: int) -> Generator[int, None, None]:
    yield 0
    if n > 0: yield 1
    last: int = 0
    next: int = 1
    for _ in range(1, n):
        last, next = next, last + next
        yield next

In [83]:
for i in fib6(5):
    print(i)

0
1
1
2
3
5


# Trivial compression

In [87]:
a: int = 5

In [85]:
import sys

In [89]:
f'{sys.getsizeof(a)} bytes'

'28 bytes'

In [113]:
class CompressedGene:
    def __init__(self, gene: str) -> None:
        self._compress(gene)
    
    def _compress(self, gene: str) -> None:
        self.bit_string: int = 1 
        for nucleotide in gene.upper():
            self.bit_string <<= 2 # shift left 2 bits
            if nucleotide == "A":
                self.bit_string |= 0b00
            elif nucleotide == "C":
                self.bit_string |= 0b01
            elif nucleotide == "G":
                self.bit_string |= 0b10
            elif nucleotide == "T":
                self.bit_string |= 0b11
            else:
                raise ValueError(f'Invalid Nucleotide: {nucleotide}')
    
    def decompress(self) -> str:
        gene: str = ''
        for i in range(0, self.bit_string.bit_length() - 1, 2): # - 1 to exclude the first 1 bit
            bits: int = self.bit_string >> i & 0b11
            if bits == 0b00:
                gene += "A"
            elif bits == 0b01:
                gene += "C"
            elif bits == 0b10:
                gene += "G"
            elif bits == 0b11:
                gene += "T"
            else:
                raise ValueError(f'Invalid bits: {bits}')
        return gene[::-1]

    def __str__(self) -> str:
        return self.decompress()

In [125]:
original = "TAGGGATTAACCGTTATATATATATAGCCATGGATCGATTATATAGGGATTAACCGTTATATATATATAGC" * 100
compressed: CompressedGene = CompressedGene(gene=original)

print(f'Original is {sys.getsizeof(original)} bytes')
print(f'Compress is {sys.getsizeof(compressed.bit_string)} bytes')
print(f'Original and decompressed are the same: {original == compressed.decompress()}')

Original is 7149 bytes
Compress is 1920 bytes
Original and decompressed are the same: True
