In [52]:
import sys

In [35]:
class InvalidNucleotideError(ValueError):
    pass

class InvalidBitsError(ValueError):
    pass

In [55]:
class CompressedGenes:
    """Compressed a gene from string to binary bits.
    """
    def __init__(self, gene: str) -> None:
        self._compress(gene)
    
    def _compress(self, gene: str) -> None:
        self.bit_string = 1
        for nucleotide in gene.upper():
            self.bit_string <<= 2
            if nucleotide == "A":
                self.bit_string |= 0b00
            elif nucleotide == "C":
                self.bit_string |= 0b01
            elif nucleotide == "G":
                self.bit_string |= 0b10
            elif nucleotide == "T":
                self.bit_string |= 0b11
            else:
                raise InvalidNucleotideError(nucleotide)
    
    def decompress(self) -> str:
        gene: str = ""
        for i in range(0, self.bit_string.bit_length()-1, 2):
            bits: int = self.bit_string >> i & 0b11
            if bits == 0b00:
                gene += "A"
            elif bits == 0b01:
                gene += "C"
            elif bits == 0b10:
                gene += "G"
            elif bits == 0b11:
                gene += "T"
            else:
                raise InvalidBitsError(bits)
        return gene[::-1]
    
    def __str__(self) -> str:
        return self.decompress()
        

In [60]:
original: str = "TAGGGATTAACCGTTATATATATATAGCCATGGATCGATTATATAGGGATTAACCGTTATATATATATAGCCATGGATCGATTATA" * 100

In [61]:
compressed = CompressedGenes(original)

In [63]:
decompressed = compressed.decompress()

In [66]:
print(f"Size of original: {sys.getsizeof(original)}")
print(f"Size of compressed: {sys.getsizeof(compressed)}")
print(f"Size of decompressed: {sys.getsizeof(decompressed)}")
print(f"Original == Decompressed: {original==decompressed})")

Size of original: 8649
Size of compressed: 48
Size of decompressed: 8649
Original == Decompressed: True)
