# Ex 2 Audio Compression
This task focuses on encoding and decoding a series of uncompressed audio files, based on the lossless data compression method "Rice Coding".

In [1]:
import math
import filecmp

The Rice Encoding takes in the input sample (```value```) and the number of bits (```k```).

- The modulus (```m```) is calculated where $m = {2^k}$
- The sample (```S```) is then encoded by finding:
    1. The quotient where $quotient = int(S / m)$
    2. The remainder where $remainder =  SmoduloM $
- The Codeword is then generated where:
    1. The quotientCode is the quotient in unary form
    2. The remainderCode is the remainder in binary using ```k``` bits
    3. The codeword will have the format < quotientCode >< remainderCode >

In [2]:
def RiceEncoder(value, k):
    m = 2**k
    quotient = int(math.floor(value / m))
    remainder = int(value % m)
    # unary encoding for quotient
    quotientCode = ("1" * quotient) + "0"
    # binary coding for remainder (k bits long)
    remainderCode = format(remainder, f"0{k}b")
#     print("value:", value, "k:", k, "m:", m)
#     print("quotient:", quotient, "   remainder:", remainder)
#     print("quotientCode:", quotientCode, "   remainderCode:", remainderCode)
    return quotientCode + remainderCode

RiceEncoder(40, 4)

'1101000'

The Rice Decoding takes in the input sample (```value```) and the number of bits (```k```).

- The modulus (```m```) is calculated where $m = 2^k$
- The quotient is determined by counting the number of 1s before the first 0
- The remainder is determined by reading the next ```k``` bits as a binary value
- The sample (```value```) is written as ```quotient * m + remainder```

In [3]:
# gets the number of significant bits in a byte
def GetBinLen(val):
    length = 0
    while(val > 0):
        length += 1
        val >>= 1
    return length

def RightPassMask(numBits):
    mask = 0
    for i in range(numBits):
        mask |= (1 << i)
    return mask

def RiceDecoder(value, k):
    m = 2**k
    binQuotient = value >> k + 1
    quotient = GetBinLen(binQuotient)
    mask = RightPassMask(k)
    remainder = value & mask
    decodedValue = int(quotient * m + remainder)
    return decodedValue
    
RiceDecoder(1101000, 4)

264

The next step is to use the Rice coding on a file.

In [4]:
def FileEncoder(source, output, k):
    bufferSize = 262144 # default buffer size
    outBuffer = bytearray()
    dByte = 0 # temp byte to store bits
    bitsLeft = 8 # bits left empty in temp byte
    
    # reading source file, writing output file
    with open(source, "rb") as sStream, open(output, "wb") as dStream:
        inBuffer = sStream.read(bufferSize)
        while inBuffer:
            for sByte in inBuffer:
                # encoding the byte
                sEncodedByte = RiceEncoder(sByte, k)
                lenEncodedByte = len(sEncodedByte)
                idx = 0 # index storing current position in encoded val
                
                # packing encoded value in chunks into dByte
                while idx < lenEncodedByte:
                    # reading bytes from idx - nextIdx, not going beyond end of encoded value
                    nextIdx = min(idx + bitsLeft, lenEncodedByte)
                    # modifying number of bits left
                    bitsLeft -= (nextIdx - idx)
                    # shifting bits to the left
                    dByte |= (int(sEncodedByte[idx:nextIdx], 2) << bitsLeft)
                    
                    if bitsLeft == 0:
                        # appending result into outBuffer
                        outBuffer.append(dByte)
                        # resetting dByte and bitsleft
                        dByte = 0
                        bitsLeft = 8
                        
                        # checking if buffer is full
                        if len(outBuffer) >= bufferSize:
                            # writing to dStream
                            dStream.write(outBuffer)
                            # resetting outBuffer
                            outBuffer = bytearray()
                            
                    idx = nextIdx
            inBuffer = sStream.read(bufferSize)
            
        # writing remaining bits in the buffer
        if bitsLeft != 8: outBuffer.append(dByte)
        # flushing buffer onto disk
        dStream.write(outBuffer)

In [5]:
def InvertByte(val):
    newVal = 0
    for i in range(8):
        mask = (1 << i)
        if (mask & val) == 0:
            newVal |= (1 << i)
    return newVal

# returning idx of first zero
def GetFirstZero(byte, startIdx):
    invertedByte = InvertByte(byte) # inverting bits to detect zeros
    for i in range(8 - startIdx):
        # creates bit mask with a 1 in each byte position (left to right)
        mask = 1 << (7 - i - startIdx)
        if (invertedByte) & mask != 0:
            return i + startIdx
    return -1

# returning bit mask having num of Bits of zeros on left
def ZerorizeBits(numBits):
    mask = 0
    # injects 1s
    for i in range(0, 8 - numBits):
        mask |= (1 << i)
    return mask

def FileDecoder(source, output, k):
    bufferSize = 262144 # default buffer size
    
    # creating buffers
    outBuffer = bytearray()
    byteBuffer = bytearray()
    
    startIdx = 0 # keeping current position in the byte
    shift = 0 # keeping end position of remainder in current / next byte
    sByte = None # temp byte to store bits
    
    # reading source file, writing output file    
    with open(source, "rb") as sStream, open(output, "wb") as dStream:
        # reading first and next bytes from stream
        sByte = sStream.read(1)
        nextByte = sStream.read(1)
        
        # continuing while there's a byte in sByte
        while len(sByte) > 0:
            # finding first zero position in byte
            idxOfZero = GetFirstZero(sByte[0], startIdx)
            
            # if not found, it's a all-1 byte, add to buffer as it is
            if idxOfZero == -1:
                byteBuffer.append(sByte[0]) # adding to buffer
                # reading next byte
                sByte = nextByte # storing nextByte contents
                nextByte = sStream.read(1) # reading next byte
                startIdx = 0 # resetting index
                continue
                
            # if zero found and the remainder falls off to next byte
            if idxOfZero + k >= 8:
                byteBuffer.append(sByte[0]) # adding byte to buffer
                # reading next byte
                sByte = nextByte # storing nextByte contents
                nextByte = sStream.read(1) # reading next byte
                # end position of remainder in next byte
                shift = (8 + 7 - (idxOfZero + k))
                
            else:
                # end position of remainder is in current byte
                shift = 7 - (idxOfZero + k)
                
            # reading byte as an integer
            sByteToInt = int.from_bytes(sByte, "big")
            # cleaning leftmost bits
            zerorize = ZerorizeBits(8-shift)
            # saving bits after end of remainding parts
            remByte = sByteToInt & zerorize
            # adding current byte to bytearray
            byteBuffer.append(sByteToInt)
            
            # looping from last to first byte in buffer
            for i in range(len(byteBuffer)-1, -1, -1):
                if (i < len(byteBuffer)-1):
                    # move rightmost bit all the way to left
                    extraBit = byteBuffer[i] & zerorize
                    extraBit = extraBit << (8 - shift)
                    # moving to the prev byte
                    byteBuffer[i+1] |= extraBit
                    
                # shifting last byte in buffer right
                byteBuffer[i] >>= shift
                
            # combining bytearray into 1 number
            val = int.from_bytes(byteBuffer, "big")
            # decypher value
            decypher = RiceDecoder(val, k)
            # writing value to output buffer
            outBuffer.append(decypher)
            # cleaning temp bytearray
            byteBuffer = bytearray()
            
            # exiting condition
            if (remByte == 0 and len(nextByte) == 0): break
                
            # adding leftover byte to buffer
            byteBuffer.append(remByte)
            startIdx = 8 - shift
            
            # resetting temp buffer if not empty
            if (len(byteBuffer) != 0):
                sByte = byteBuffer
                # reinitialising buffer
                byteBuffer = bytearray()
            else:
                # reading next byte
                sByte = nextByte
                nextByte = sStream.read(1)
                
            # if output buffer full, write to output stream
            if (len(outBuffer) >= bufferSize):
                # writing buffer to output stream
                dStream.write(outBuffer)
                # emptying buffer
                outBuffer = bytearray()
                
        # emptying buffer into output
        dStream.write(outBuffer)
        dStream.close()

In [8]:
originalFile = "audios\Sound1.wav"
encodedFile = "audios\Sound1_enc.ex2"
decodedFile = "audios\Sound1_enc_dec2.wav"

FileEncoder(originalFile, encodedFile, 2)
FileDecoder(encodedFile, decodedFile, 2)

cmp = filecmp.cmp(originalFile, decodedFile)
print(f"File composition equality:", cmp)

originalFile = "audios\Sound2.wav"
encodedFile = "audios\Sound2_enc.ex2"
decodedFile = "audios\Sound2_enc_dec2.wav"

FileEncoder(originalFile, encodedFile, 2)
FileDecoder(encodedFile, decodedFile, 2)

cmp = filecmp.cmp(originalFile, decodedFile)
print(f"File composition equality:", cmp)

originalFile = "audios\Sound1.wav"
encodedFile = "audios\Sound1_enc.ex4"
decodedFile = "audios\Sound1_enc_dec4.wav"

FileEncoder(originalFile, encodedFile, 4)
FileDecoder(encodedFile, decodedFile, 4)

cmp = filecmp.cmp(originalFile, decodedFile)
print(f"File composition equality:", cmp)

originalFile = "audios\Sound2.wav"
encodedFile = "audios\Sound2_enc.ex4"
decodedFile = "audios\Sound2_enc_dec4.wav"

FileEncoder(originalFile, encodedFile, 4)
FileDecoder(encodedFile, decodedFile, 4)

cmp = filecmp.cmp(originalFile, decodedFile)
print(f"File composition equality:", cmp)

File composition equality: False
File composition equality: True
File composition equality: True
File composition equality: True
