# Debug the Jyputer Notebook

from IPython.core.debugger import set_trace

(https://medium.com/@chrieke/jupyter-tips-and-tricks-994fdddb2057)
# 

In [11]:
#-------------------------------------------------------------------------------
# Trivial Compression and DeCompress class
#-------------------------------------------------------------------------------
class CompressedGene:
  #-----------------------------------------------------------------------------
  # c-tor, note it compresses the string passed in
  #-----------------------------------------------------------------------------
  def __init__(self, gene: str) -> None:
    self._compress(gene)

  #-----------------------------------------------------------------------------
  # member method to decompress the compressed string passed in to this class
  #-----------------------------------------------------------------------------
  def decompress(self):
    rslt: str = ""

    # excluding the sentinel (ie, the first 01 two bits)
    bytesExcludingSentinel: int = self.bit_str.bit_length() - 1

    # extract two-bytes every step
    for i in range(0, bytesExcludingSentinel, 2):
      # get 2 relevant bits
      bits: int = self.bit_str >> i & 0b11 

      if bits == 0b00:
        rslt += "A"
      elif bits == 0b01:
        rslt += "C"
      elif bits == 0b10:
        rslt += "G"
      elif bits == 0b11:
        rslt += "T"
      else:
        raise ValueError("Invalid bits: {}".format(bits))

    # note, the reversing logic    
    return rslt[::-1]            

  #----------------------------------------------------------------------------
  # similar to Java's toString() 
  #----------------------------------------------------------------------------
  def __str__(self) -> str:         # does "->" mean function return type?
    return self.decompress()


  #----------------------------------------------------------------------------
  # Note, this compressed self.bit_str has the bytes of the REVERSE string
  # passed in; 
  #----------------------------------------------------------------------------
  def _compress(self, gene: str):

    # sentinel  (so, the self.bit_Str will always have 01 at the beginning)
    self.bit_str: int = 1
    
    # check each letter, appends it to the end of the self.bit_str
    for l in gene.upper():
       
       self.bit_str <<= 2

       if l == "A":
         self.bit_str |= 0b00
       elif l == "C":
         self.bit_str |= 0b01
       elif l == "G":
         self.bit_str |= 0b10
       elif l == "T":
         self.bit_str |= 0b11
       else:
         raise ValueError("Invalid letter: {}".format(l)) 

  #----------------------------------------------------------------------------
  # get the compressed string, represented in byte string
  #   (tunred out unnecessary, because self.bt_str has public access)
  #----------------------------------------------------------------------------
  def getCompressed(self):
    return self.bit_str     


#-------------------------------------------------------------------------------
# Program, main function                                                      --
#-------------------------------------------------------------------------------
if __name__ == "__main__":
  from sys import getsizeof

  #
  from IPython.core.debugger import set_trace
  # set_trace()

  # (ATGC 0100111001)
  # note, when the origin string is small/short (ie, if just ATGC), 
  # and then the compressed bytes are the same as that of origin's
  origin: str = "ATGC" *10000  # "ATGTCAT" * 2

  compressed: CompressedGene = CompressedGene(origin)

  print("The original Gene Sequence bytes: {}, bytes after compressed: {}, compressed to {}"
    .format(getsizeof(origin), getsizeof(compressed.getCompressed()), compressed.bit_str))
  
  print("decompressing the origin string to {}".format(compressed))


The original Gene Sequence bytes: 40049, bytes after compressed: 10692, compressed to 30709131355642419680511830832667809106889033096713684936892881926381861532372759571047071660878286126307428098302281791205596792058229110999843463401051887180607140741878280748748585938748752898511061086781147633588445823126631664219053059243589777756882158514543359490862835841392073850472118100123136273341431946537025798839870750409029693773628115913778514900738885721785291595510288704849972042814349007357380775353505717579051875287330037575422705127626125138640862784422686044415521035950477077091960253929578380410711461133527912924946902946502276153476253176240019772748680692317910691137875349361129813402957146353107255238038915629329915989675557717249263248615422721164783403815059568774322626765074423299096705077264859000956259539996736632522333449171682022716163626758370488192984201194212830936270592933776276503236566061095524940150017321916294527398701537306131375737655604334485835277640522567107